[Mlir-commits] [mlir] 37a1473 - [mlir][bufferize] Allow in-place bufferization for writes to init_tensors in loops

Wed May 4 00:48:16 PDT 2022

Author: Matthias Springer
Date: 2022-05-04T16:43:43+09:00
New Revision: 37a147352457c690a6130a5d20ef381037fc3548

URL: https://github.com/llvm/llvm-project/commit/37a147352457c690a6130a5d20ef381037fc3548
DIFF: https://github.com/llvm/llvm-project/commit/37a147352457c690a6130a5d20ef381037fc3548.diff

LOG: [mlir][bufferize] Allow in-place bufferization for writes to init_tensors in loops

This commit relaxes the rules around ops that define a value but do not specify the tensor's contents. (The only such op at the moment is init_tensor.)

When such a tensor is written in a loop, it should not cause out-of-place bufferization.

Differential Revision: https://reviews.llvm.org/D124849

Added: 
    

Modified: 
    mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
    mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
index d1fbb70f889c8..593cbf0bb43ab 100644

--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -340,6 +340,19 @@ getCommonEnclosingRepetitiveRegion(ArrayRef<Value> values) {
   return r;
 }
 
+/// Return `true` if the given tensor value is a memory write. Most values are
+/// tensor writes, but ops that define a tensor SSA value without specifying its
+/// contents (e.g., init_tensor) are not.
+static bool isMemoryWrite(Value value, const AnalysisState &state) {
+  auto opResult = value.dyn_cast<OpResult>();
+  if (!opResult)
+    return true;
+  auto bufferizableOp = state.getOptions().dynCastBufferizableOp(value);
+  if (!bufferizableOp)
+    return true;
+  return bufferizableOp.isMemoryWrite(opResult, state);
+}
+
 /// Annotate IR with details about the detected RaW conflict.
 static void annotateConflict(OpOperand *uRead, OpOperand *uConflictingWrite,
                              Value lastWrite) {
@@ -386,10 +399,11 @@ static bool hasReadAfterWriteInterference(
     AnalysisState &state, const BufferizationAliasInfo &aliasInfo) {
   const BufferizationOptions &options = state.getOptions();
 
-  // Gather all written aliases.
+  // Gather all written aliases. Skip over aliases that are not actual writes.
   SmallVector<Value> writtenAliases;
   for (OpOperand *uWrite : usesWrite)
-    writtenAliases.push_back(uWrite->get());
+    if (isMemoryWrite(uWrite->get(), state))
+      writtenAliases.push_back(uWrite->get());
   // Find the inner-most enclosing repetitive region of each alias. If this is
   // the same region for every alias, save it in `repetitiveRegionOfWrites`.
   Optional<Region *> repetitiveRegionOfWrites =
@@ -451,9 +465,14 @@ static bool hasReadAfterWriteInterference(
       // Note: iter_args of loops are not aliases of their respective block
       // arguments, so op domanice can be used when analyzing ops that operate
       // on them.
+      //
+      // Note: If `writtenAliases` is empty, there are no memory writes outside
+      // of the repetitive region of conflictingWritingOp, which means that all
+      // relevant aliases are inside the same repetitive region.
       bool canUseOpDominance =
+          writtenAliases.empty() ||
           repetitiveRegionOfWrites ==
-          getEnclosingRepetitiveRegion(conflictingWritingOp);
+              getEnclosingRepetitiveRegion(conflictingWritingOp);
 
       // No conflict if the readingOp dominates conflictingWritingOp, i.e., the
       // write is not visible when reading.

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
index 8506b6d1c897d..72e5d8cc84caa 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -1243,3 +1243,62 @@ func.func @write_to_same_tensor_in_loop_out_of_place(
 
   return %r0 : tensor<?xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_init_tensor_in_place(
+func.func @write_to_same_init_tensor_in_place(
+    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %lb : index, %ub : index, %step : index, %sz: index, %sz2: index)
+  -> (tensor<?xf32>)
+{
+  %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+
+  // CHECK: scf.for {{.*}} {
+  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+    %i2 = arith.index_cast %i : index to i32
+    %i3 = arith.sitofp %i2 : i32 to f32
+    // %B is written multiple times inside a loop, but it is an init_tensor.
+    //      CHECK: tensor.insert
+    // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "true", "none"]}
+    %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
+    //      CHECK: tensor.insert_slice
+    // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+    %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+    scf.yield %A2 : tensor<?xf32>
+  }
+  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+  return %r0 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_init_tensor_out_of_place(
+func.func @write_to_same_init_tensor_out_of_place(
+    %A : tensor<?xf32> {linalg.inplaceable = true},
+    %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32)
+  -> (tensor<?xf32>)
+{
+  %B = linalg.init_tensor [%sz2] : tensor<?xf32>
+  %C = tensor.insert %f into %B[%lb] : tensor<?xf32>
+
+  // CHECK: scf.for {{.*}} {
+  %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+    %i2 = arith.index_cast %i : index to i32
+    %i3 = arith.sitofp %i2 : i32 to f32
+    // %C is written multiple times inside a loop. Even though %C aliases with
+    // an init_tensor, out-of-bounds bufferization is necessary because there is
+    // another alias (%C) outside of the loop.
+    //      CHECK: tensor.insert
+    // CHECK-SAME:   {__inplace_operands_attr__ = ["none", "false", "none"]}
+    %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32>
+    //      CHECK: tensor.insert_slice
+    // CHECK-SAME:   {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+    %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+    scf.yield %A2 : tensor<?xf32>
+  }
+  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+  return %r0 : tensor<?xf32>
+}