[Mlir-commits] [mlir] 25bc684 - [mlir][linalg][bufferize] Always bufferize in-place with "out" operands by default

Thu Feb 24 02:58:15 PST 2022

Author: Matthias Springer
Date: 2022-02-24T19:58:05+09:00
New Revision: 25bc684603b1aa000b92aba2e4d4fd0bf524eb02

URL: https://github.com/llvm/llvm-project/commit/25bc684603b1aa000b92aba2e4d4fd0bf524eb02
DIFF: https://github.com/llvm/llvm-project/commit/25bc684603b1aa000b92aba2e4d4fd0bf524eb02.diff

LOG: [mlir][linalg][bufferize] Always bufferize in-place with "out" operands by default

In D115022, we introduced an optimization where OpResults of a `linalg.generic` may bufferize in-place with an "in" OpOperand if the corresponding "out" OpOperand is not used in the computation.

This optimization can lead to unexpected behavior if the newly chosen OpOperand is in the same alias set as another OpOperand (that is used in the computation). In that case, the newly chosen OpOperand must bufferize out-of-place. This can be confusing to users, as always choosing the "out" OpOperand (regardless of whether it is used) would be expected when having the notion of "destination-passing style" in mind.

With this change, we go back to always bufferizing in-place with "out" OpOperands by default, but letting users override the behavior with a bufferization option.

Differential Revision: https://reviews.llvm.org/D120182

Added: 
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir

Modified: 
    mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
    mlir/include/mlir/Dialect/Linalg/Passes.td
    mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
    mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 433da0d953086..ae03c4d36a192 100644

--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -202,6 +202,20 @@ struct BufferizationOptions {
   /// For debugging only. Should be used together with `testAnalysisOnly`.
   bool printConflicts = false;
 
+  /// If set to `true`, an `getAliasingOpResult` will return the corresponding
+  /// "out"/"dest" OpOperand for every op that has the notion of an "out"/"dest"
+  /// operand. I.e., the aliasing OpOperand of the i-th tensor OpResult is
+  /// usually the i-th "out" tensor OpOperand. This is in line with
+  /// destination-passing style and the default behavior. Op interface
+  /// implementations must follow this contract to avoid surprising behavior.
+  ///
+  /// If set to `false`, BufferizableOpInterface implementations can try to be
+  /// smart and choose to alias with "in" operands or other operands. E.g., the
+  /// result of a `linalg.generic` op could bufferize in-place with an "in"
+  /// OpOperand if the corresponding "out" operand is not used within the
+  /// computation. Whether this pays off or not can be very input IR-specific.
+  bool alwaysAliasingWithDest = true;
+
   /// Buffer alignment for new memory allocations.
   unsigned int bufferAlignment = 128;
 

diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index 44fac0cb46914..dc14011c8fd13 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -49,6 +49,10 @@ def LinalgComprehensiveModuleBufferize :
     Option<"allowUnknownOps", "allow-unknown-ops", "bool",
            /*default=*/"false",
            "Allows unknown (not bufferizable) ops in the input IR.">,
+    Option<"alwaysAliasingWithDest", "always-aliasing-with-dest", "bool",
+            /*default=*/"true",
+            "Tensor OpResult cannot bufferize inplace OpOperands other than "
+            "out or dest OpOperands (if the op has a notion of such operands)">,
     Option<"useAlloca", "use-alloca", "bool",
            /*default=*/"false",
            "Use stack allocations for memrefs (for testing purposes only)">,

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
index f08be273248fe..799c13726091c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -164,8 +164,7 @@ struct LinalgOpInterface
 
   bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
                                const BufferizationState &state) const {
-    // Operand is written to if it has an aliasing OpResult. For more details,
-    // see `computeAliasingPairs`.
+    // Operand is written to if it has an aliasing OpResult.
     auto bufferizableOp = cast<BufferizableOpInterface>(op);
     return !bufferizableOp.getAliasingOpResult(opOperand, state).empty();
   }
@@ -175,6 +174,12 @@ struct LinalgOpInterface
                        const BufferizationState &state) const {
     auto genericOp = cast<linalg::LinalgOp>(op);
 
+    // By default, the i-th OpResult may alias with the i-th "out" tensor.
+    if (state.getOptions().alwaysAliasingWithDest)
+      return {genericOp.getOutputOperand(opResult.getResultNumber())};
+
+    // We can try to be smart and alias in-place with an "in" tensor if the
+    // corresponding "out" tensor is not used in the computation.
     // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
     DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
     for (OpOperand *opOperand : genericOp.getInputAndOutputOperands())
@@ -188,6 +193,14 @@ struct LinalgOpInterface
                       const BufferizationState &state) const {
     auto genericOp = cast<linalg::LinalgOp>(op);
 
+    // By default, the i-th "out" tensor may alias with the i-th OpResult.
+    if (state.getOptions().alwaysAliasingWithDest) {
+      if (genericOp.isOutputTensor(&opOperand))
+        return {genericOp.getTiedOpResult(&opOperand)};
+      return {};
+    }
+
+    // We can try to be smart. See comment in `getAliasingOpOperand`.
     // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
     DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
     if (!pairs.count(&opOperand))

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index ed5067c0db220..d7869b4a9f417 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -97,6 +97,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
     opt.fullyDynamicLayoutMaps = fullyDynamicLayoutMaps;
     opt.printConflicts = printConflicts;
     opt.testAnalysisOnly = testAnalysisOnly;
+    opt.alwaysAliasingWithDest = alwaysAliasingWithDest;
     if (initTensorElimination) {
       opt.addPostAnalysisStep(insertSliceAnchoredInitTensorEliminationStep);
     }

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir
new file mode 100644
index 0000000000000..4438176ef4ecd
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-aliasing-in.mlir
@@ -0,0 +1,75 @@
+// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-memref always-aliasing-with-dest=0" -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
+//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
+func @linalg_op_bufferizes_inplace_with_input(
+    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
+    %t2: tensor<?xf32> {linalg.inplaceable = false},
+    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
+    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
+  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
+  %r = linalg.generic {
+    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                     affine_map<(d0, d1) -> (d1)>,
+                     affine_map<(d0, d1)-> (d0, d1)>],
+    iterator_types = ["parallel", "parallel"]}
+    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
+    outs(%t3 : tensor<?x?xf32>) {
+      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
+        %add = arith.addf %arg0, %arg1 : f32
+        linalg.yield %add : f32
+    } -> tensor<?x?xf32>
+  return %r : tensor<?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
+//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
+func @linalg_op_bufferizes_out_of_place_with_input(
+    %t1: tensor<?x?xf32> {linalg.inplaceable = false},
+    %t2: tensor<?xf32> {linalg.inplaceable = false},
+    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
+    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
+  // CHECK: %[[alloc:.*]] = memref.alloc
+  // CHECK: memref.copy %[[t1]], %[[alloc]]
+  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}})
+  %r = linalg.generic {
+    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                     affine_map<(d0, d1) -> (d1)>,
+                     affine_map<(d0, d1)-> (d0, d1)>],
+    iterator_types = ["parallel", "parallel"]}
+    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
+    outs(%t3 : tensor<?x?xf32>) {
+      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
+        %add = arith.addf %arg0, %arg1 : f32
+        linalg.yield %add : f32
+    } -> tensor<?x?xf32>
+  // CHECK: return %[[alloc]]
+  return %r : tensor<?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
+//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
+func @linalg_op_output_cannot_alias_with_input(
+    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
+    %t2: tensor<?xf32> {linalg.inplaceable = false},
+    %t3: tensor<?x?xf32> {linalg.inplaceable = true},
+    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
+  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
+  %r = linalg.generic {
+    indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
+                     affine_map<(d0, d1) -> (d1)>,
+                     affine_map<(d0, d1)-> (d0, d1)>],
+    iterator_types = ["parallel", "parallel"]}
+    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
+    outs(%t3 : tensor<?x?xf32>) {
+      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
+        %add = arith.addf %arg0, %arg1 : f32
+        linalg.yield %add : f32
+    } -> tensor<?x?xf32>
+  return %r : tensor<?x?xf32>
+}
+

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir
new file mode 100644
index 0000000000000..5b9c3af74074e
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-aliasing-in.mlir
@@ -0,0 +1,83 @@
+// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-memref always-aliasing-with-dest=0" -split-input-file | FileCheck %s
+
+// This is a test case for alwaysAliasingWithDest = 0. In that case, an OpResult
+// may bufferize in-place with an "in" OpOperand or any non-"out" OpOperand.
+
+
+#accesses = [
+  affine_map<(i) -> (i)>,
+  affine_map<(i) -> (i)>,
+  affine_map<(i) -> (i)>
+]
+#trait = {
+  indexing_maps = #accesses,
+  iterator_types = ["parallel"]
+}
+
+// CHECK-LABEL: func @linalg_op_same_out_tensors(
+func @linalg_op_same_out_tensors(
+    %t1: tensor<?xf32> {linalg.inplaceable = true},
+// CHECK-SAME:          bufferization.access = "read-write"
+    %t2: tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-SAME:          bufferization.access = "write"
+  -> (tensor<?xf32>, tensor<?xf32>){
+
+  // %1 and %2 are not used in the computation, so the two OpResults do not
+  // necessarily have to bufferize in-place with the two "out" OpOperands. They
+  // bufferize in-place with the first and second OpOperand (one of which is an
+  // "in" OpOperand).
+  //      CHECK: linalg.generic
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]
+  %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
+                               outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
+      ^bb(%0: f32, %1: f32, %2 : f32) :
+        linalg.yield %0, %0 : f32, f32
+    } -> (tensor<?xf32>, tensor<?xf32>)
+
+  //      CHECK: return
+  // CHECK-SAME: __equivalent_func_args__ = [0, 1]
+  return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+#accesses = [
+  affine_map<(i) -> (i)>,
+  affine_map<(i) -> (i)>,
+  affine_map<(i) -> (i)>,
+  affine_map<(i) -> (i)>
+]
+#trait = {
+  indexing_maps = #accesses,
+  iterator_types = ["parallel"]
+}
+
+// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
+func @linalg_op_same_out_tensors_2(
+    %t1: tensor<?xf32> {linalg.inplaceable = true},
+// CHECK-SAME:          bufferization.access = "read-write"
+    %t2: tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-SAME:          bufferization.access = "write"
+        -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
+
+  // %1, %2 and %3 are not used in the computation, so the three OpResults do
+  // not necessarily have to bufferize in-place with the three "out" OpOperands.
+  // They bufferize in-place with the first, second and third OpOperand (one of
+  // which is an "in" OpOperand).
+  // In contrast to the previous test case, two of the chosen OpOperands are the
+  // same (aliasing) SSA value, which is why one of them must bufferize
+  // out-of-place.
+  //      CHECK: linalg.generic
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"]
+  %o:3 = linalg.generic #trait
+          ins(%t1 : tensor<?xf32>)
+          outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
+      ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
+        linalg.yield %0, %0, %0 : f32, f32, f32
+    } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
+
+  //      CHECK: return
+  // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1]
+  return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
+}
+

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
index d0d60bb4e94ff..248a966a7a624 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
@@ -990,13 +990,13 @@ func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},
 // CHECK-LABEL: func @linalg_op_same_out_tensors(
 func @linalg_op_same_out_tensors(
     %t1: tensor<?xf32> {linalg.inplaceable = true},
-// CHECK-SAME:          bufferization.access = "read-write"
+// CHECK-SAME:          bufferization.access = "read"
     %t2: tensor<?xf32> {linalg.inplaceable = true})
 // CHECK-SAME:          bufferization.access = "write"
   -> (tensor<?xf32>, tensor<?xf32>){
 
   //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]
   %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
                                outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
       ^bb(%0: f32, %1: f32, %2 : f32) :
@@ -1004,7 +1004,7 @@ func @linalg_op_same_out_tensors(
     } -> (tensor<?xf32>, tensor<?xf32>)
 
   //      CHECK: return
-  // CHECK-SAME: __equivalent_func_args__ = [0, 1]
+  // CHECK-SAME: __equivalent_func_args__ = [1, -1]
   return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
 }
 
@@ -1024,13 +1024,13 @@ func @linalg_op_same_out_tensors(
 // CHECK-LABEL: func @linalg_op_same_out_tensors_2(
 func @linalg_op_same_out_tensors_2(
     %t1: tensor<?xf32> {linalg.inplaceable = true},
-// CHECK-SAME:          bufferization.access = "read-write"
+// CHECK-SAME:          bufferization.access = "read"
     %t2: tensor<?xf32> {linalg.inplaceable = true})
 // CHECK-SAME:          bufferization.access = "write"
         -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
 
   //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"]
   %o:3 = linalg.generic #trait
           ins(%t1 : tensor<?xf32>)
           outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
@@ -1039,7 +1039,7 @@ func @linalg_op_same_out_tensors_2(
     } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
 
   //      CHECK: return
-  // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1]
+  // CHECK-SAME: __equivalent_func_args__ = [1, -1, -1]
   return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
 }
 

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index ba6da08a097a8..b76ece00c3123 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1176,63 +1176,12 @@ func @gather_like(
 //  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
 func @linalg_op_bufferizes_inplace_with_input(
     %t1: tensor<?x?xf32> {linalg.inplaceable = true},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
-  %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d1)>,
-                     affine_map<(d0, d1)-> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
-      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
-        %add = arith.addf %arg0, %arg1 : f32
-        linalg.yield %add : f32
-    } -> tensor<?x?xf32>
-  return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
-//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func @linalg_op_bufferizes_out_of_place_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = false},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
-    %t3: tensor<?x?xf32> {linalg.inplaceable = false},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
-  // CHECK: %[[alloc:.*]] = memref.alloc
-  // CHECK: memref.copy %[[t1]], %[[alloc]]
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}})
-  %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d1)>,
-                     affine_map<(d0, d1)-> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
-      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
-        %add = arith.addf %arg0, %arg1 : f32
-        linalg.yield %add : f32
-    } -> tensor<?x?xf32>
-  // CHECK: return %[[alloc]]
-  return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
-//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func @linalg_op_output_cannot_alias_with_input(
-    %t1: tensor<?x?xf32> {linalg.inplaceable = true},
-    %t2: tensor<?xf32> {linalg.inplaceable = false},
+    %t2: tensor<?xf32> {linalg.inplaceable = true},
     %t3: tensor<?x?xf32> {linalg.inplaceable = true},
     %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
   // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
   %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
+    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                      affine_map<(d0, d1) -> (d1)>,
                      affine_map<(d0, d1)-> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}