[Mlir-commits] [mlir] ad2e635 - [mlir][linalg][bufferize] Remove always-aliasing-with-dest option

Wed Jun 15 00:57:08 PDT 2022

Author: Matthias Springer
Date: 2022-06-15T09:56:53+02:00
New Revision: ad2e635faea75cbb15f108dc219c79bdd5638023

URL: https://github.com/llvm/llvm-project/commit/ad2e635faea75cbb15f108dc219c79bdd5638023
DIFF: https://github.com/llvm/llvm-project/commit/ad2e635faea75cbb15f108dc219c79bdd5638023.diff

LOG: [mlir][linalg][bufferize] Remove always-aliasing-with-dest option

This flag was introduced for a use case in IREE, but it is no longer needed.

Differential Revision: https://reviews.llvm.org/D126965

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
    mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
    mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp

Removed: 
    mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
    mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 7c84b6fd3d059..72ec758606d92 100644

--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -293,20 +293,6 @@ struct BufferizationOptions {
   /// For debugging only. Should be used together with `testAnalysisOnly`.
   bool printConflicts = false;
 
-  /// If set to `true`, an `getAliasingOpResult` will return the corresponding
-  /// "out"/"dest" OpOperand for every op that has the notion of an "out"/"dest"
-  /// operand. I.e., the aliasing OpOperand of the i-th tensor OpResult is
-  /// usually the i-th "out" tensor OpOperand. This is in line with
-  /// destination-passing style and the default behavior. Op interface
-  /// implementations must follow this contract to avoid surprising behavior.
-  ///
-  /// If set to `false`, BufferizableOpInterface implementations can try to be
-  /// smart and choose to alias with "in" operands or other operands. E.g., the
-  /// result of a `linalg.generic` op could bufferize in-place with an "in"
-  /// OpOperand if the corresponding "out" operand is not used within the
-  /// computation. Whether this pays off or not can be very input IR-specific.
-  bool alwaysAliasingWithDest = true;
-
   /// Buffer alignment for new memory allocations.
   unsigned int bufferAlignment = 128;
 

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 16426744cc77a..1fa4090a0e133 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -270,10 +270,6 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
     Option<"allowUnknownOps", "allow-unknown-ops", "bool",
            /*default=*/"false",
            "Allows unknown (not bufferizable) ops in the input IR.">,
-    Option<"alwaysAliasingWithDest", "always-aliasing-with-dest", "bool",
-            /*default=*/"true",
-            "Tensor OpResult cannot bufferize inplace OpOperands other than "
-            "out/dest OpOperands (if the op has such operands; experimental)">,
     Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
            /*default=*/"0",
            "Test only: Analyze ops in random order with a given seed (fuzzer)">,

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index f3a8d5306de79..6b6eaf15c55ac 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -182,7 +182,6 @@ struct OneShotBufferizePass
       // pass.
       opt.allowReturnAllocs = allowReturnAllocs;
       opt.allowUnknownOps = allowUnknownOps;
-      opt.alwaysAliasingWithDest = alwaysAliasingWithDest;
       opt.analysisFuzzerSeed = analysisFuzzerSeed;
       opt.createDeallocs = createDeallocs;
       opt.functionBoundaryTypeConversion =

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
index a924e8c8c2640..bccae3b8ba69b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -86,69 +86,6 @@ static LogicalResult bufferizeLinalgOp(RewriterBase &rewriter, LinalgOp op,
   return success();
 }
 
-/// Linalg OpResults usually bufferize inplace with their tied (output
-/// OpOperands. However, if an output OpOperand is not used in the computation,
-/// it is better to bufferize inplace with an actually used input OpOperand;
-/// less memory will be touched that way.
-///
-/// Example:
-/// O(i, j) = A(i, j) + B(j)  --> bufferizes inplace to:  A(i, j) += B(j)
-///
-/// O(i, j) = A(j, i) + B(j)  --> cannot bufferize inplace with A because
-///                               indexing maps are not identical
-///
-/// O(i, j) += A(i, j) + B(j) --> Output is used in computation.
-/// This could bufferize inplace with A:
-/// A(i, j) += O(i, j) + B(j)
-/// However, we choose to bufferize inplace with O here, as there is no clear
-/// benefit of choosing A. TODO: We may want to consider both options and make
-/// an informed decision during analysis in the future.
-static DenseMap<OpOperand *, OpResult> computeAliasingPairs(LinalgOp op) {
-  DenseMap<OpOperand *, OpResult> mapping;
-  for (OpResult opResult : op->getOpResults()) {
-    OpOperand *tiedOperand =
-        op.getOutputTensorOperands()[opResult.getResultNumber()];
-    AffineMap outputIndexingMap = op.getTiedIndexingMap(tiedOperand);
-    bool onlyParallelIterators = op.getNumParallelLoops() == op.getNumLoops();
-    bool tiedOperandUsed = op.payloadUsesValueFromOperand(tiedOperand);
-
-    // If the output arg is used in the computation or at least one iterator is
-    // not parallel, try to bufferize inplace with the corresponding output
-    // tensor.
-    if (tiedOperandUsed || !onlyParallelIterators) {
-      mapping[tiedOperand] = opResult;
-      continue;
-    }
-
-    // Otherwise, try to bufferize inplace with one of the inputs.
-    OpOperand *chosenOperand = nullptr;
-    for (OpOperand *opOperand : op.getInputTensorOperands()) {
-      if (opOperand->get().getType() != opResult.getType())
-        continue;
-      if (!op.payloadUsesValueFromOperand(opOperand))
-        continue;
-      if (op.getTiedIndexingMap(opOperand) != outputIndexingMap)
-        continue;
-      // No other OpResult bufferizes aliases with this OpOperand.
-      if (mapping.count(opOperand))
-        continue;
-      assert(op.getTiedIndexingMap(opOperand).isProjectedPermutation() &&
-             "expected projected permutation");
-      chosenOperand = opOperand;
-      break;
-    }
-
-    // No suitable input tensor found. Use output tensor.
-    // TODO: This operand could bufferize inplace with OpOperands that have the
-    // correct type, even if they are not used inside the computation.
-    if (!chosenOperand)
-      chosenOperand = tiedOperand;
-
-    mapping[chosenOperand] = opResult;
-  }
-  return mapping;
-}
-
 /// Bufferization of linalg.generic. Replace with a new linalg.generic that
 /// operates entirely on memrefs.
 template <typename OpTy>
@@ -174,37 +111,18 @@ struct LinalgOpInterface
                        const AnalysisState &state) const {
     auto genericOp = cast<linalg::LinalgOp>(op);
 
-    // By default, the i-th OpResult may alias with the i-th "out" tensor.
-    if (state.getOptions().alwaysAliasingWithDest)
-      return {genericOp.getOutputOperand(opResult.getResultNumber())};
-
-    // We can try to be smart and alias in-place with an "in" tensor if the
-    // corresponding "out" tensor is not used in the computation.
-    // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
-    DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
-    for (OpOperand *opOperand : genericOp.getInputAndOutputOperands())
-      if (pairs[opOperand] == opResult)
-        return {opOperand};
-    return {};
+    // The i-th OpResult may alias with the i-th "out" tensor.
+    return {genericOp.getOutputOperand(opResult.getResultNumber())};
   }
 
   SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
                                             const AnalysisState &state) const {
     auto genericOp = cast<linalg::LinalgOp>(op);
 
-    // By default, the i-th "out" tensor may alias with the i-th OpResult.
-    if (state.getOptions().alwaysAliasingWithDest) {
-      if (genericOp.isOutputTensor(&opOperand))
-        return {genericOp.getTiedOpResult(&opOperand)};
-      return {};
-    }
-
-    // We can try to be smart. See comment in `getAliasingOpOperand`.
-    // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
-    DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
-    if (!pairs.count(&opOperand))
-      return {};
-    return {pairs[&opOperand]};
+    // The i-th "out" tensor may alias with the i-th OpResult.
+    if (genericOp.isOutputTensor(&opOperand))
+      return {genericOp.getTiedOpResult(&opOperand)};
+    return {};
   }
 
   BufferRelation bufferRelation(Operation *op, OpResult opResult,

diff  --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
deleted file mode 100644
index 6d475bac61a08..0000000000000
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s
-
-// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
-//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_bufferizes_inplace_with_input(
-    %t1: tensor<?x?xf32> {bufferization.writable = true},
-    %t2: tensor<?xf32> {bufferization.writable = false},
-    %t3: tensor<?x?xf32> {bufferization.writable = false},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
-  %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d1)>,
-                     affine_map<(d0, d1)-> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
-      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
-        %add = arith.addf %arg0, %arg1 : f32
-        linalg.yield %add : f32
-    } -> tensor<?x?xf32>
-  return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
-//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_bufferizes_out_of_place_with_input(
-    %t1: tensor<?x?xf32> {bufferization.writable = false},
-    %t2: tensor<?xf32> {bufferization.writable = false},
-    %t3: tensor<?x?xf32> {bufferization.writable = false},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
-  // CHECK: %[[alloc:.*]] = memref.alloc
-  // CHECK: memref.copy %[[t1]], %[[alloc]]
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}})
-  %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d1)>,
-                     affine_map<(d0, d1)-> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
-      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
-        %add = arith.addf %arg0, %arg1 : f32
-        linalg.yield %add : f32
-    } -> tensor<?x?xf32>
-  // CHECK: return %[[alloc]]
-  return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
-//  CHECK-SAME:     %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_output_cannot_alias_with_input(
-    %t1: tensor<?x?xf32> {bufferization.writable = true},
-    %t2: tensor<?xf32> {bufferization.writable = false},
-    %t3: tensor<?x?xf32> {bufferization.writable = true},
-    %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
-  %r = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
-                     affine_map<(d0, d1) -> (d1)>,
-                     affine_map<(d0, d1)-> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
-      ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
-        %add = arith.addf %arg0, %arg1 : f32
-        linalg.yield %add : f32
-    } -> tensor<?x?xf32>
-  return %r : tensor<?x?xf32>
-}
-

diff  --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir
deleted file mode 100644
index 1d9d066f45855..0000000000000
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir
+++ /dev/null
@@ -1,83 +0,0 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s
-
-// This is a test case for alwaysAliasingWithDest = 0. In that case, an OpResult
-// may bufferize in-place with an "in" OpOperand or any non-"out" OpOperand.
-
-
-#accesses = [
-  affine_map<(i) -> (i)>,
-  affine_map<(i) -> (i)>,
-  affine_map<(i) -> (i)>
-]
-#trait = {
-  indexing_maps = #accesses,
-  iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @linalg_op_same_out_tensors(
-func.func @linalg_op_same_out_tensors(
-    %t1: tensor<?xf32> {bufferization.writable = true},
-// CHECK-SAME:          bufferization.access = "read-write"
-    %t2: tensor<?xf32> {bufferization.writable = true})
-// CHECK-SAME:          bufferization.access = "write"
-  -> (tensor<?xf32>, tensor<?xf32>){
-
-  // %1 and %2 are not used in the computation, so the two OpResults do not
-  // necessarily have to bufferize in-place with the two "out" OpOperands. They
-  // bufferize in-place with the first and second OpOperand (one of which is an
-  // "in" OpOperand).
-  //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]
-  %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
-                               outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
-      ^bb(%0: f32, %1: f32, %2 : f32) :
-        linalg.yield %0, %0 : f32, f32
-    } -> (tensor<?xf32>, tensor<?xf32>)
-
-  //      CHECK: return
-  // CHECK-SAME: __equivalent_func_args__ = [0, 1]
-  return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-#accesses = [
-  affine_map<(i) -> (i)>,
-  affine_map<(i) -> (i)>,
-  affine_map<(i) -> (i)>,
-  affine_map<(i) -> (i)>
-]
-#trait = {
-  indexing_maps = #accesses,
-  iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
-func.func @linalg_op_same_out_tensors_2(
-    %t1: tensor<?xf32> {bufferization.writable = true},
-// CHECK-SAME:          bufferization.access = "read-write"
-    %t2: tensor<?xf32> {bufferization.writable = true})
-// CHECK-SAME:          bufferization.access = "write"
-        -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
-
-  // %1, %2 and %3 are not used in the computation, so the three OpResults do
-  // not necessarily have to bufferize in-place with the three "out" OpOperands.
-  // They bufferize in-place with the first, second and third OpOperand (one of
-  // which is an "in" OpOperand).
-  // In contrast to the previous test case, two of the chosen OpOperands are the
-  // same (aliasing) SSA value, which is why one of them must bufferize
-  // out-of-place.
-  //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"]
-  %o:3 = linalg.generic #trait
-          ins(%t1 : tensor<?xf32>)
-          outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
-      ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
-        linalg.yield %0, %0, %0 : f32, f32, f32
-    } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
-
-  //      CHECK: return
-  // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1]
-  return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
-}
-