[Mlir-commits] [mlir] 494505f - [mlir][bufferize][NFC] Move SCF test cases to SCF dialect
Matthias Springer
llvmlistbot at llvm.org
Fri Apr 22 04:39:00 PDT 2022
Author: Matthias Springer
Date: 2022-04-22T20:35:20+09:00
New Revision: 494505f39fec863d5944c5f3b50ced66d6c3a102
URL: https://github.com/llvm/llvm-project/commit/494505f39fec863d5944c5f3b50ced66d6c3a102
DIFF: https://github.com/llvm/llvm-project/commit/494505f39fec863d5944c5f3b50ced66d6c3a102.diff
LOG: [mlir][bufferize][NFC] Move SCF test cases to SCF dialect
Differential Revision: https://reviews.llvm.org/D124249
Added:
mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
mlir/test/Dialect/SCF/one-shot-bufferize.mlir
Modified:
mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
Removed:
################################################################################
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
index 5489b2bad46e4..8506b6d1c897d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -559,121 +559,6 @@ func.func @nested_extract_slice_and_insert(
return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
}
-//===----------------------------------------------------------------------===//
-// Simple loop cases
-//===----------------------------------------------------------------------===//
-
-// -----
-
-// CHECK-LABEL: func @scf_for_yield_only
-func.func @scf_for_yield_only(
- %A : tensor<?xf32> {bufferization.writable = false},
- %B : tensor<?xf32> {bufferization.writable = true},
- %lb : index,
- %ub : index,
- %step : index)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- // CHECK: scf.for
- // CHECK-NEXT: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
- %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
- scf.yield %t : tensor<?xf32>
- }
-
- // CHECK: scf.for
- // CHECK-NEXT: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
- %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
- scf.yield %t : tensor<?xf32>
- }
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
- return %r0, %r1: tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
-func.func @scf_for_with_tensor.insert_slice(
- %A : tensor<?xf32> {bufferization.writable = false},
- %B : tensor<?xf32> {bufferization.writable = true},
- %C : tensor<4xf32> {bufferization.writable = false},
- %lb : index,
- %ub : index,
- %step : index)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- // CHECK: scf.for
- // scf.for bbArgs are always inplaceable seen from ops inside the body:
- // 1. Either the matching tensor is not inplaceable and an alloc occurs
- // which makes bbArg inplaceable.
- // 2. Or it is already inplaceable and so is bbArg.
- // CHECK-NEXT: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
- // CHECK-NEXT: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
- // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]}
- // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]}
- %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
- -> (tensor<?xf32>, tensor<?xf32>)
- {
- %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
- %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
- scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
- }
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
- return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-func.func private @some_use(tensor<?xf32>) -> ()
-
-// CHECK-LABEL: func @scf_for_deps
-func.func @scf_for_deps(
- %A : tensor<?xf32> {bufferization.writable = true},
- %B : tensor<?xf32> {bufferization.writable = true},
- %lb : index,
- %ub : index,
- %step : index)
- -> (tensor<?xf32>)
-{
- // %r0 must be out of place because one use of %t in the subsequent production
- // of %r1 is read.
- // CHECK: scf.for
- // CHECK-NEXT: call
- // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
- // CHECK-NEXT: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
- %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
- func.call @some_use(%t) : (tensor<?xf32>) -> ()
- scf.yield %t : tensor<?xf32>
- }
-
- // %r1 bufferizes inplace fine.
- // CHECK: scf.for
- // CHECK-NEXT: call
- // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
- // CHECK-NEXT: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
- %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
- func.call @some_use(%t) : (tensor<?xf32>) -> ()
- scf.yield %t : tensor<?xf32>
- }
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r1: tensor<?xf32>
-}
-
// -----
//===----------------------------------------------------------------------===//
@@ -1148,465 +1033,6 @@ func.func @extract_once_insert_twice(
// -----
-#accesses = [
- affine_map<(i) -> (i)>
-]
-#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @reading_scf_for
-func.func @reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
- %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
-
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %cst = arith.constant 0.0 : f32
-
- // Write to %t1.
- // CHECK: vector.transfer_write
- // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"]
- %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
-
- // Read the old value of %t1 inside the loop via an alias.
- // CHECK: scf.for {{.*}} {
- %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"]
- %e = tensor.extract_slice %t2[%s][%s][1] : tensor<?xf32> to tensor<?xf32>
-
- // Read from %t1 via alias %e.
- %v2 = vector.transfer_read %e[%s], %cst : tensor<?xf32>, vector<5xf32>
- scf.yield %t2, %v2 : tensor<?xf32>, vector<5xf32>
- }
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]}
-
- // Use %t3 in some way without reading it, so that it does not get DCE'd.
- // CHECK: linalg.generic
- // CHECK-SAME: __inplace_operands_attr__ = ["true"]
- %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
- ^bb(%0: f32) :
- linalg.yield %cst : f32
- } -> (tensor<?xf32>)
-
- return %o, %v3 : tensor<?xf32>, vector<5xf32>
-}
-
-// -----
-
-#accesses = [
- affine_map<(i) -> (i)>
-]
-#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @non_reading_scf_for
-func.func @non_reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
- %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
-
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %cst = arith.constant 0.0 : f32
-
- // Write to %t1.
- // CHECK: vector.transfer_write
- // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
- %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
-
- // This loop does not read from %t1. It only writes to it.
- // CHECK: scf.for
- %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
- // Write to %t1 via %t2. (Overwrite %t3.)
- // CHECK: linalg.generic
- // CHECK-SAME: __inplace_operands_attr__ = ["true"]
- %o2 = linalg.generic #trait outs (%t2 : tensor<?xf32>) {
- ^bb(%0: f32) :
- linalg.yield %cst : f32
- } -> (tensor<?xf32>)
-
- // Read overwritten value. This is not a read of %t1.
- %v2 = vector.transfer_read %o2[%s], %cst : tensor<?xf32>, vector<5xf32>
- scf.yield %o2, %v2 : tensor<?xf32>, vector<5xf32>
- }
-
- // Use %t3 in some way without reading it, so that it does not get DCE'd.
- // CHECK: linalg.generic
- // CHECK-SAME: __inplace_operands_attr__ = ["true"]
- %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
- ^bb(%0: f32) :
- linalg.yield %cst : f32
- } -> (tensor<?xf32>)
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0, -1]
- return %o, %v3 : tensor<?xf32>, vector<5xf32>
-}
-
-// -----
-
-//===----------------------------------------------------------------------===//
-// scf.if cases
-//===----------------------------------------------------------------------===//
-
-// This example passes analysis, but it fails when bufferizing.
-// CHECK-LABEL: func @scf_if_inplace1
-func.func @scf_if_inplace1(%t1: tensor<?xf32> {bufferization.writable = true},
- %t2: tensor<?xf32> {bufferization.writable = true},
- %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- }
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace2
-func.func @scf_if_inplace2(%t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index,
- %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- scf.yield %t2 : tensor<?xf32>
- }
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace3
-func.func @scf_if_inplace3(%t1: tensor<?xf32> {bufferization.writable = true},
- %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
- %cond: i1) -> tensor<?xf32> {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
- %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- } else {
- // Writing the same tensor through an alias. This is OK.
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t3 : tensor<?xf32>
- }
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_in_place4
-func.func @scf_if_in_place4(%t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index,
- %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
- %cst = arith.constant 0.0 : f32
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- }
- %r_alias = scf.if %cond2 -> (tensor<?xf32>) {
- // Reading %r is OK. No conflict.
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %r : tensor<?xf32>
- } else {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %r : tensor<?xf32>
- }
- %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0, -1]
- return %r_alias, %v2 : tensor<?xf32>, vector<10xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace5
-func.func @scf_if_inplace5(%t1: tensor<?xf32> {bufferization.writable = true},
- %idx: index, %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
- %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %e : tensor<?xf32>
- } else {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
- %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %f : tensor<?xf32>
- }
-
- // Inserting into an equivalent tensor at the same offset. This bufferizes
- // inplace.
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
- %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r2 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace6
-func.func @scf_if_inplace6(%t1: tensor<?xf32> {bufferization.writable = true},
- %v1: vector<5xf32>, %v2: vector<5xf32>,
- %v3: vector<5xf32>, %idx: index,
- %cond: i1, %cond2: i1) -> tensor<?xf32> {
- // Test nested scf.if ops.
- %r = scf.if %cond -> (tensor<?xf32>) {
- %t2 = scf.if %cond2 -> (tensor<?xf32>) {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t3 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t4 : tensor<?xf32>
- }
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t3 : tensor<?xf32>
- }
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace7
-func.func @scf_if_inplace7(%t1: tensor<?xf32> {bufferization.writable = true},
- %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
- %idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
- %cst = arith.constant 0.0 : f32
- %r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
- %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
- scf.yield %t2, %v1 : tensor<?xf32>, vector<5xf32>
- } else {
- // Writing the same tensor through an alias.
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
- %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // Read the original value of %t1. This requires the write in this branch
- // to be out-of-place. But the write in the other branch can still be
- // inplace.
- %v_r = vector.transfer_read %t1[%idx2], %cst : tensor<?xf32>, vector<5xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
- scf.yield %t3, %v_r : tensor<?xf32>, vector<5xf32>
- }
- return %r, %v_r2 : tensor<?xf32>, vector<5xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_out_of_place1a
-func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {bufferization.writable = true},
- %idx: index, %idx2: index,
- %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
- %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %e : tensor<?xf32>
- } else {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- }
-
- // Reading from and writing to the same tensor via
diff erent args. This is a
- // conflict.
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]
- %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
- return %r2 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_out_of_place1b
-func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {bufferization.writable = true},
- %idx: index, %idx2: index, %idx3: index,
- %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
- %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %e : tensor<?xf32>
- } else {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
- %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %f : tensor<?xf32>
- }
-
- // Reading from and writing to the same tensor via
diff erent args. This is a
- // conflict. In contrast to scf_if_out_of_place1a, the fact that %r aliases
- // with %t1 is only detected when analyzing the tensor.extract_slices. That's
- // why the tensor.insert_slice is inplace and the two extract_slices are
- // out-of-place.
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
- %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r2 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_out_of_place1c
-func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {bufferization.writable = true},
- %idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
- %r = scf.if %cond -> (tensor<?xf32>) {
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
- %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %e : tensor<?xf32>
- } else {
- // TODO: This one could bufferize inplace, but the analysis is too restrictive.
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
- %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %f : tensor<?xf32>
- }
-
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
- %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0]
- return %r2 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_out_of_place2
-func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index,
- %cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
- %cst = arith.constant 0.0 : f32
- %r = scf.if %cond -> (tensor<?xf32>) {
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
- %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- }
-
- // Read the old value of %t1. Forces the transfer_write to bufferize
- // out-of-place.
- %v2 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<10xf32>
- return %r, %v2 : tensor<?xf32>, vector<10xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_out_of_place3
-func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index,
- %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
- %cst = arith.constant 0.0 : f32
- %r = scf.if %cond -> (tensor<?xf32>) {
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: vector.transfer_write
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
- %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t2 : tensor<?xf32>
- }
- %t1_alias = scf.if %cond2 -> (tensor<?xf32>) {
- // scf.yield bufferizes to a read. That is a conflict in this example.
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- } else {
- // CHECK: scf.yield
- // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
- scf.yield %t1 : tensor<?xf32>
- }
- %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
- return %r, %v2 : tensor<?xf32>, vector<10xf32>
-}
-
-// -----
-
// CHECK-LABEL: func @some_use
func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
@@ -1817,30 +1243,3 @@ func.func @write_to_same_tensor_in_loop_out_of_place(
return %r0 : tensor<?xf32>
}
-
-// -----
-
-// CHECK-LABEL: func @write_to_same_tensor_in_loop_in_place(
-func.func @write_to_same_tensor_in_loop_in_place(
- %A : tensor<?xf32> {linalg.inplaceable = true},
- %lb : index, %ub : index, %step : index, %sz: index)
- -> (tensor<?xf32>)
-{
- // CHECK: scf.for {{.*}} {
- %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
- %B = linalg.init_tensor [%sz] : tensor<?xf32>
- %i2 = arith.index_cast %i : index to i32
- %i3 = arith.sitofp %i2 : i32 to f32
- // The tensor.insert is in-place because the %B is defined inside the loop.
- // CHECK: tensor.insert
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
- %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
- %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
- scf.yield %A2 : tensor<?xf32>
- }
- // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
-
- return %r0 : tensor<?xf32>
-}
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 2bf8a2d5a8921..451ad9c174ad0 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -286,142 +286,12 @@ func.func @insert_slice_fun_not_inplace(
return %r0: tensor<?xf32>
}
-//===----------------------------------------------------------------------===//
-// Simple loop cases
-//===----------------------------------------------------------------------===//
-
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @scf_for_yield_only(
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
-// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: ) -> memref<?xf32> {
-func.func @scf_for_yield_only(
- %A : tensor<?xf32> {bufferization.writable = false},
- %B : tensor<?xf32> {bufferization.writable = true},
- %lb : index, %ub : index, %step : index)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
- // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
-
- // The first scf.for remains but just turns into dead code.
- %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
- scf.yield %t : tensor<?xf32>
- }
-
- // The second scf.for remains but just turns into dead code.
- %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
- scf.yield %t : tensor<?xf32>
- }
-
- // CHECK: return %[[ALLOC_FOR_A]] : memref<?xf32>
- // CHECK-NOT: dealloc
- return %r0, %r1: tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// Ensure that the function bufferizes without error. This tests pre-order
-// traversal of scf.for loops during bufferization. No need to check the IR,
-// just want to make sure that it does not crash.
-
-// CHECK-LABEL: func @nested_scf_for
-func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
- %v : vector<5xf32>) -> tensor<?xf32> {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c10 = arith.constant 10 : index
- %r1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%B = %A) -> tensor<?xf32> {
- %r2 = scf.for %j = %c0 to %c10 step %c1 iter_args(%C = %B) -> tensor<?xf32> {
- %w = vector.transfer_write %v, %C[%c0] : vector<5xf32>, tensor<?xf32>
- scf.yield %w : tensor<?xf32>
- }
- scf.yield %r2 : tensor<?xf32>
- }
- return %r1 : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @scf_for_with_tensor.insert_slice(
- %A : tensor<?xf32> {bufferization.writable = false},
- %B : tensor<?xf32> {bufferization.writable = true},
- %C : tensor<4xf32> {bufferization.writable = false},
- %lb : index, %ub : index, %step : index)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
- // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
-
- // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1]
- // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1]
-
- // CHECK: scf.for {{.*}}
- // CHECK-NOT: iter_args
- %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
- -> (tensor<?xf32>, tensor<?xf32>)
- {
- // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA
- // CHECK: memref.copy %[[C]], %[[svA]]
- %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B
- // CHECK: memref.copy %[[C]], %[[svB]]
- %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // CHECK-NOT: scf.yield
- scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
- }
-
- // CHECK: return %[[ALLOC_FOR_A]] : memref<?xf32>
- return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
-}
-
// -----
//===----------------------------------------------------------------------===//
// Cross function boundary cases.
//===----------------------------------------------------------------------===//
-// CHECK-LABEL: func @execute_region_with_conflict(
-// CHECK-SAME: %[[m1:.*]]: memref<?xf32
-func.func @execute_region_with_conflict(
- %t1 : tensor<?xf32> {bufferization.writable = "true"})
- -> (f32, tensor<?xf32>, f32)
-{
- %f1 = arith.constant 0.0 : f32
- %idx = arith.constant 7 : index
-
- // scf.execute_region is canonicalized away after bufferization. So just the
- // memref.store is left over.
-
- // CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK: memref.copy %[[m1]], %[[alloc]]
- // CHECK: memref.store %{{.*}}, %[[alloc]][%{{.*}}]
- %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) {
- %t2 = tensor.insert %f1 into %t1[%idx] : tensor<?xf32>
- scf.yield %f1, %t2, %f1 : f32, tensor<?xf32>, f32
- }
-
- // CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
- // CHECK: %[[load:.*]] = memref.load %[[m1]]
- %3 = tensor.extract %t1[%idx] : tensor<?xf32>
-
- // CHECK: return %{{.*}}, %[[casted]], %[[load]] : f32, memref<?xf32, #{{.*}}>, f32
- return %0, %1, %3 : f32, tensor<?xf32>, f32
-}
-
-// -----
-
// CHECK: func @matmul(
// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<128x256xf32>
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32>
@@ -536,80 +406,6 @@ func.func @dominance_violation_bug_1(
return %rA : tensor<?x?xf32>
}
-// -----
-
-// CHECK-LABEL: func @scf_if_inplace(
-// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref<?xf32{{.*}}>, %[[v:.*]]: vector
-func.func @scf_if_inplace(%cond: i1,
- %t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index) -> tensor<?xf32> {
-
- // CHECK: scf.if %[[cond]] {
- // CHECK-NEXT: } else {
- // CHECK-NEXT: vector.transfer_write %[[v]], %[[t1]]
- // CHECK-NEXT: }
- // CHECK-NEXT: return
- %r = scf.if %cond -> (tensor<?xf32>) {
- scf.yield %t1 : tensor<?xf32>
- } else {
- %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
- scf.yield %t2 : tensor<?xf32>
- }
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_inside_scf_for
-// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index
-// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] {
-// CHECK: scf.if %{{.*}} {
-// CHECK: } else {
-// CHECK: vector.transfer_write
-// CHECK: }
-// CHECK: }
-func.func @scf_if_inside_scf_for(
- %t1: tensor<?xf32> {bufferization.writable = true},
- %v: vector<5xf32>, %idx: index,
- %cond: i1)
- -> tensor<?xf32>
-{
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c10 = arith.constant 10 : index
- %r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor<?xf32>) {
- %r2 = scf.if %cond -> (tensor<?xf32>) {
- scf.yield %bb : tensor<?xf32>
- } else {
- %t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor<?xf32>
- scf.yield %t2 : tensor<?xf32>
- }
- scf.yield %r2 : tensor<?xf32>
- }
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @scf_if_non_equiv_yields(
-// CHECK-SAME: %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}>
-func.func @scf_if_non_equiv_yields(
- %b : i1,
- %A : tensor<4xf32> {bufferization.writable = false},
- %B : tensor<4xf32> {bufferization.writable = false})
- -> tensor<4xf32>
-{
- // CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]]
- %r = scf.if %b -> (tensor<4xf32>) {
- scf.yield %A : tensor<4xf32>
- } else {
- scf.yield %B : tensor<4xf32>
- }
- // CHECK: return %[[r]]
- return %r: tensor<4xf32>
-}
// -----
@@ -823,126 +619,3 @@ func.func @rank_reducing(
}
return %5: tensor<?x1x6x8xf32>
}
-
-// -----
-
-// Note: This bufferization is inefficient, but it bufferizes correctly.
-
-// CHECK-LABEL: func @scf_execute_region_yield_non_equivalent(
-// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}})
-// CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]]
-// CHECK: memref.dealloc %[[alloc]]
-// CHECK: %[[r:.*]] = memref.load %[[clone]][%{{.*}}]
-// CHECK: memref.dealloc %[[clone]]
-// CHECK: return %[[r]]
-func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 {
- %r = scf.execute_region -> (tensor<?xf32>) {
- %t2 = linalg.init_tensor [%i] : tensor<?xf32>
- scf.yield %t2 : tensor<?xf32>
- }
- %f = tensor.extract %r[%j] : tensor<?xf32>
- return %f : f32
-}
-
-// -----
-
-// Note: This bufferizes to inefficient code, but bufferization should not see
-// such IR in the first place. The iter_arg would canonicalize away. This test
-// case is just to ensure that the bufferization generates correct code.
-
-// CHECK-LABEL: func @scf_for_yield_non_equivalent(
-// CHECK-SAME: %[[t:.*]]: memref<?xf32
-// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}})
-// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[alloc]])
-// CHECK: memref.dealloc %[[iter]]
-// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}})
-// CHECK: memref.copy %[[t]], %[[alloc2]]
-// CHECK: scf.yield %[[alloc2]]
-// CHECK: return %[[for]]
-func.func @scf_for_yield_non_equivalent(
- %t: tensor<?xf32>, %lb : index, %ub : index, %step : index) -> tensor<?xf32> {
- %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor<?xf32> {
- scf.yield %t : tensor<?xf32>
- }
-
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// Note: This bufferizes to inefficient code, but bufferization should not see
-// such IR in the first place. The iter_arg would canonicalize away. This test
-// case is just to ensure that the bufferization generates correct code.
-
-// CHECK-LABEL: func @scf_for_yield_allocation(
-// CHECK-SAME: %[[t:.*]]: memref<?xf32
-// CHECK: %[[cloned:.*]] = bufferization.clone %[[t]]
-// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[cloned]])
-// This alloc is for the linalg.init_tensor.
-// CHECK-DAG: %[[alloc2:.*]] = memref.alloc(%{{.*}})
-// CHECK-DAG: memref.dealloc %[[iter]]
-// This alloc is for the scf.yield.
-// CHECK: %[[alloc3:.*]] = memref.alloc(%{{.*}})
-// CHECK: memref.copy %[[alloc2]], %[[alloc3]]
-// CHECK: memref.dealloc %[[alloc2]]
-// CHECK: %[[casted3:.*]] = memref.cast %[[alloc3]]
-// CHECK: scf.yield %[[casted3]]
-// CHECK: return %[[for]]
-func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,
- %step : index) -> tensor<?xf32> {
- %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor<?xf32> {
- %t2 = linalg.init_tensor [%i] : tensor<?xf32>
- scf.yield %t2 : tensor<?xf32>
- }
-
- return %r : tensor<?xf32>
-}
-
-// -----
-
-// TODO: The scf.yield could bufferize to 1 alloc and 2 copies (instead of
-// 2 allocs and 2 copies).
-
-// CHECK-LABEL: func @scf_for_swapping_yields(
-// CHECK-SAME: %[[A:.*]]: memref<?xf32, #{{.*}}>, %[[B:.*]]: memref<?xf32, #{{.*}}>
-func.func @scf_for_swapping_yields(
- %A : tensor<?xf32>, %B : tensor<?xf32> {bufferization.writable = true},
- %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
- -> (f32, f32)
-{
-// CHECK-DAG: %[[clone1:.*]] = bufferization.clone %[[A]]
-// CHECK-DAG: %[[clone2:.*]] = bufferization.clone %[[B]]
-// CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[clone1]], %[[iter2:.*]] = %[[clone2]])
- %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
- -> (tensor<?xf32>, tensor<?xf32>)
- {
-// CHECK: %[[sv1:.*]] = memref.subview %[[iter1]]
-// CHECK: memref.copy %{{.*}}, %[[sv1]]
- %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
-// CHECK: %[[sv2:.*]] = memref.subview %[[iter2]]
-// CHECK: memref.copy %{{.*}}, %[[sv2]]
- %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
-// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}})
-// CHECK: memref.copy %[[iter2]], %[[alloc2]]
-// CHECK: memref.dealloc %[[iter2]]
-// CHECK: %[[alloc1:.*]] = memref.alloc(%{{.*}})
-// CHECK: memref.copy %[[iter1]], %[[alloc1]]
-// CHECK: memref.dealloc %[[iter1]]
-// CHECK: %[[casted1:.*]] = memref.cast %[[alloc1]]
-// CHECK: %[[casted2:.*]] = memref.cast %[[alloc2]]
-// CHECK: scf.yield %[[casted2]], %[[casted1]]
- // Yield tensors in
diff erent order.
- scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
- }
-
-// CHECK: %[[r0:.*]] = memref.load %[[for]]#0
-// CHECK: memref.dealloc %[[for]]#0
-// CHECK: %[[r1:.*]] = memref.load %[[for]]#1
-// CHECK: memref.dealloc %[[for]]#1
- %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
- %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
-// CHECK: return %[[r0]], %[[r1]]
- return %f0, %f1: f32, f32
-}
-
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
new file mode 100644
index 0000000000000..10e6b289f8ed0
--- /dev/null
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -0,0 +1,601 @@
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+
+// Run fuzzer with
diff erent seeds.
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+
+// CHECK-LABEL: func @scf_for_yield_only
+func.func @scf_for_yield_only(
+ %A : tensor<?xf32> {bufferization.writable = false},
+ %B : tensor<?xf32> {bufferization.writable = true},
+ %lb : index,
+ %ub : index,
+ %step : index)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ // CHECK: scf.for
+ // CHECK-NEXT: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // CHECK: scf.for
+ // CHECK-NEXT: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+ %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
+ return %r0, %r1: tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+func.func @scf_for_with_tensor.insert_slice(
+ %A : tensor<?xf32> {bufferization.writable = false},
+ %B : tensor<?xf32> {bufferization.writable = true},
+ %C : tensor<4xf32> {bufferization.writable = false},
+ %lb : index,
+ %ub : index,
+ %step : index)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ // CHECK: scf.for
+ // scf.for bbArgs are always inplaceable seen from ops inside the body:
+ // 1. Either the matching tensor is not inplaceable and an alloc occurs
+ // which makes bbArg inplaceable.
+ // 2. Or it is already inplaceable and so is bbArg.
+ // CHECK-NEXT: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
+ // CHECK-NEXT: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
+ // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]}
+ // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]}
+ %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+ -> (tensor<?xf32>, tensor<?xf32>)
+ {
+ %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+ %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+ scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
+ }
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
+ return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+func.func private @some_use(tensor<?xf32>) -> ()
+
+// CHECK-LABEL: func @scf_for_deps
+func.func @scf_for_deps(
+ %A : tensor<?xf32> {bufferization.writable = true},
+ %B : tensor<?xf32> {bufferization.writable = true},
+ %lb : index,
+ %ub : index,
+ %step : index)
+ -> (tensor<?xf32>)
+{
+ // %r0 must be out of place because one use of %t in the subsequent production
+ // of %r1 is read.
+ // CHECK: scf.for
+ // CHECK-NEXT: call
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
+ // CHECK-NEXT: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ func.call @some_use(%t) : (tensor<?xf32>) -> ()
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // %r1 bufferizes inplace fine.
+ // CHECK: scf.for
+ // CHECK-NEXT: call
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
+ // CHECK-NEXT: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+ %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ func.call @some_use(%t) : (tensor<?xf32>) -> ()
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r1: tensor<?xf32>
+}
+
+// -----
+
+#accesses = [
+ affine_map<(i) -> (i)>
+]
+#trait = {
+ indexing_maps = #accesses,
+ iterator_types = ["parallel"]
+}
+
+// CHECK-LABEL: func @reading_scf_for
+func.func @reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
+ %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
+
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 0.0 : f32
+
+ // Write to %t1.
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"]
+ %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
+
+ // Read the old value of %t1 inside the loop via an alias.
+ // CHECK: scf.for {{.*}} {
+ %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"]
+ %e = tensor.extract_slice %t2[%s][%s][1] : tensor<?xf32> to tensor<?xf32>
+
+ // Read from %t1 via alias %e.
+ %v2 = vector.transfer_read %e[%s], %cst : tensor<?xf32>, vector<5xf32>
+ scf.yield %t2, %v2 : tensor<?xf32>, vector<5xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]}
+
+ // Use %t3 in some way without reading it, so that it does not get DCE'd.
+ // CHECK: linalg.generic
+ // CHECK-SAME: __inplace_operands_attr__ = ["true"]
+ %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
+ ^bb(%0: f32) :
+ linalg.yield %cst : f32
+ } -> (tensor<?xf32>)
+
+ return %o, %v3 : tensor<?xf32>, vector<5xf32>
+}
+
+// -----
+
+#accesses = [
+ affine_map<(i) -> (i)>
+]
+#trait = {
+ indexing_maps = #accesses,
+ iterator_types = ["parallel"]
+}
+
+// CHECK-LABEL: func @non_reading_scf_for
+func.func @non_reading_scf_for(%t1: tensor<?xf32> {bufferization.writable = true},
+ %s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
+
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %cst = arith.constant 0.0 : f32
+
+ // Write to %t1.
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
+ %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
+
+ // This loop does not read from %t1. It only writes to it.
+ // CHECK: scf.for
+ %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
+ // Write to %t1 via %t2. (Overwrite %t3.)
+ // CHECK: linalg.generic
+ // CHECK-SAME: __inplace_operands_attr__ = ["true"]
+ %o2 = linalg.generic #trait outs (%t2 : tensor<?xf32>) {
+ ^bb(%0: f32) :
+ linalg.yield %cst : f32
+ } -> (tensor<?xf32>)
+
+ // Read overwritten value. This is not a read of %t1.
+ %v2 = vector.transfer_read %o2[%s], %cst : tensor<?xf32>, vector<5xf32>
+ scf.yield %o2, %v2 : tensor<?xf32>, vector<5xf32>
+ }
+
+ // Use %t3 in some way without reading it, so that it does not get DCE'd.
+ // CHECK: linalg.generic
+ // CHECK-SAME: __inplace_operands_attr__ = ["true"]
+ %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
+ ^bb(%0: f32) :
+ linalg.yield %cst : f32
+ } -> (tensor<?xf32>)
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0, -1]
+ return %o, %v3 : tensor<?xf32>, vector<5xf32>
+}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// scf.if cases
+//===----------------------------------------------------------------------===//
+
+// This example passes analysis, but it fails when bufferizing.
+// CHECK-LABEL: func @scf_if_inplace1
+func.func @scf_if_inplace1(%t1: tensor<?xf32> {bufferization.writable = true},
+ %t2: tensor<?xf32> {bufferization.writable = true},
+ %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ }
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace2
+func.func @scf_if_inplace2(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace3
+func.func @scf_if_inplace3(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
+ %cond: i1) -> tensor<?xf32> {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
+ %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ } else {
+ // Writing the same tensor through an alias. This is OK.
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t3 : tensor<?xf32>
+ }
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_in_place4
+func.func @scf_if_in_place4(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
+ %cst = arith.constant 0.0 : f32
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ }
+ %r_alias = scf.if %cond2 -> (tensor<?xf32>) {
+ // Reading %r is OK. No conflict.
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %r : tensor<?xf32>
+ } else {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %r : tensor<?xf32>
+ }
+ %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0, -1]
+ return %r_alias, %v2 : tensor<?xf32>, vector<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace5
+func.func @scf_if_inplace5(%t1: tensor<?xf32> {bufferization.writable = true},
+ %idx: index, %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
+ %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %e : tensor<?xf32>
+ } else {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
+ %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %f : tensor<?xf32>
+ }
+
+ // Inserting into an equivalent tensor at the same offset. This bufferizes
+ // inplace.
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
+ %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r2 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace6
+func.func @scf_if_inplace6(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v1: vector<5xf32>, %v2: vector<5xf32>,
+ %v3: vector<5xf32>, %idx: index,
+ %cond: i1, %cond2: i1) -> tensor<?xf32> {
+ // Test nested scf.if ops.
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ %t2 = scf.if %cond2 -> (tensor<?xf32>) {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t3 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t4 : tensor<?xf32>
+ }
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t3 : tensor<?xf32>
+ }
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace7
+func.func @scf_if_inplace7(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
+ %idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
+ %cst = arith.constant 0.0 : f32
+ %r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
+ %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
+ scf.yield %t2, %v1 : tensor<?xf32>, vector<5xf32>
+ } else {
+ // Writing the same tensor through an alias.
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
+ %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // Read the original value of %t1. This requires the write in this branch
+ // to be out-of-place. But the write in the other branch can still be
+ // inplace.
+ %v_r = vector.transfer_read %t1[%idx2], %cst : tensor<?xf32>, vector<5xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
+ scf.yield %t3, %v_r : tensor<?xf32>, vector<5xf32>
+ }
+ return %r, %v_r2 : tensor<?xf32>, vector<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_out_of_place1a
+func.func @scf_if_out_of_place1a(%t1: tensor<?xf32> {bufferization.writable = true},
+ %idx: index, %idx2: index,
+ %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
+ %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %e : tensor<?xf32>
+ } else {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ }
+
+ // Reading from and writing to the same tensor via
diff erent args. This is a
+ // conflict.
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]
+ %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
+ return %r2 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_out_of_place1b
+func.func @scf_if_out_of_place1b(%t1: tensor<?xf32> {bufferization.writable = true},
+ %idx: index, %idx2: index, %idx3: index,
+ %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
+ %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %e : tensor<?xf32>
+ } else {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
+ %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %f : tensor<?xf32>
+ }
+
+ // Reading from and writing to the same tensor via
diff erent args. This is a
+ // conflict. In contrast to scf_if_out_of_place1a, the fact that %r aliases
+ // with %t1 is only detected when analyzing the tensor.extract_slices. That's
+ // why the tensor.insert_slice is inplace and the two extract_slices are
+ // out-of-place.
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
+ %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r2 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_out_of_place1c
+func.func @scf_if_out_of_place1c(%t1: tensor<?xf32> {bufferization.writable = true},
+ %idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
+ %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %e : tensor<?xf32>
+ } else {
+ // TODO: This one could bufferize inplace, but the analysis is too restrictive.
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
+ %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %f : tensor<?xf32>
+ }
+
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
+ %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0]
+ return %r2 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_out_of_place2
+func.func @scf_if_out_of_place2(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
+ %cst = arith.constant 0.0 : f32
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
+ %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ }
+
+ // Read the old value of %t1. Forces the transfer_write to bufferize
+ // out-of-place.
+ %v2 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<10xf32>
+ return %r, %v2 : tensor<?xf32>, vector<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_out_of_place3
+func.func @scf_if_out_of_place3(%t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
+ %cst = arith.constant 0.0 : f32
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
+ %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t2 : tensor<?xf32>
+ }
+ %t1_alias = scf.if %cond2 -> (tensor<?xf32>) {
+ // scf.yield bufferizes to a read. That is a conflict in this example.
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ // CHECK: scf.yield
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+ scf.yield %t1 : tensor<?xf32>
+ }
+ %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
+ return %r, %v2 : tensor<?xf32>, vector<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @write_to_same_tensor_in_loop_in_place(
+func.func @write_to_same_tensor_in_loop_in_place(
+ %A : tensor<?xf32> {linalg.inplaceable = true},
+ %lb : index, %ub : index, %step : index, %sz: index)
+ -> (tensor<?xf32>)
+{
+ // CHECK: scf.for {{.*}} {
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ %B = linalg.init_tensor [%sz] : tensor<?xf32>
+ %i2 = arith.index_cast %i : index to i32
+ %i3 = arith.sitofp %i2 : i32 to f32
+ // The tensor.insert is in-place because the %B is defined inside the loop.
+ // CHECK: tensor.insert
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
+ %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
+ %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32>
+ scf.yield %A2 : tensor<?xf32>
+ }
+ // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
+
+ return %r0 : tensor<?xf32>
+}
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
new file mode 100644
index 0000000000000..1b6fd99147970
--- /dev/null
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -0,0 +1,330 @@
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
+
+// Run fuzzer with
diff erent seeds.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// Test bufferization using memref types that have no layout map.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @scf_for_yield_only(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
+// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: ) -> memref<?xf32> {
+func.func @scf_for_yield_only(
+ %A : tensor<?xf32> {bufferization.writable = false},
+ %B : tensor<?xf32> {bufferization.writable = true},
+ %lb : index, %ub : index, %step : index)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
+ // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
+
+ // The first scf.for remains but just turns into dead code.
+ %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // The second scf.for remains but just turns into dead code.
+ %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
+ scf.yield %t : tensor<?xf32>
+ }
+
+ // CHECK: return %[[ALLOC_FOR_A]] : memref<?xf32>
+ // CHECK-NOT: dealloc
+ return %r0, %r1: tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// Ensure that the function bufferizes without error. This tests pre-order
+// traversal of scf.for loops during bufferization. No need to check the IR,
+// just want to make sure that it does not crash.
+
+// CHECK-LABEL: func @nested_scf_for
+func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
+ %v : vector<5xf32>) -> tensor<?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %r1 = scf.for %i = %c0 to %c10 step %c1 iter_args(%B = %A) -> tensor<?xf32> {
+ %r2 = scf.for %j = %c0 to %c10 step %c1 iter_args(%C = %B) -> tensor<?xf32> {
+ %w = vector.transfer_write %v, %C[%c0] : vector<5xf32>, tensor<?xf32>
+ scf.yield %w : tensor<?xf32>
+ }
+ scf.yield %r2 : tensor<?xf32>
+ }
+ return %r1 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
+func.func @scf_for_with_tensor.insert_slice(
+ %A : tensor<?xf32> {bufferization.writable = false},
+ %B : tensor<?xf32> {bufferization.writable = true},
+ %C : tensor<4xf32> {bufferization.writable = false},
+ %lb : index, %ub : index, %step : index)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ // CHECK: %[[ALLOC_FOR_A:.*]] = memref.alloc
+ // CHECK: memref.copy %[[A]], %[[ALLOC_FOR_A]]
+
+ // CHECK: %[[svA:.*]] = memref.subview %[[ALLOC_FOR_A]][0] [4] [1]
+ // CHECK: %[[svB:.*]] = memref.subview %[[B]][0] [4] [1]
+
+ // CHECK: scf.for {{.*}}
+ // CHECK-NOT: iter_args
+ %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+ -> (tensor<?xf32>, tensor<?xf32>)
+ {
+ // %ttA bufferizes to direct copy of %BUFFER_CAST_C into %svA
+ // CHECK: memref.copy %[[C]], %[[svA]]
+ %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // %ttB bufferizes to direct copy of %BUFFER_CAST_C into %BUFFER_CAST_B
+ // CHECK: memref.copy %[[C]], %[[svB]]
+ %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // CHECK-NOT: scf.yield
+ scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
+ }
+
+ // CHECK: return %[[ALLOC_FOR_A]] : memref<?xf32>
+ return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @execute_region_with_conflict(
+// CHECK-SAME: %[[m1:.*]]: memref<?xf32
+func.func @execute_region_with_conflict(
+ %t1 : tensor<?xf32> {bufferization.writable = "true"})
+ -> (f32, tensor<?xf32>, f32)
+{
+ %f1 = arith.constant 0.0 : f32
+ %idx = arith.constant 7 : index
+
+ // scf.execute_region is canonicalized away after bufferization. So just the
+ // memref.store is left over.
+
+ // CHECK: %[[alloc:.*]] = memref.alloc
+ // CHECK: memref.copy %[[m1]], %[[alloc]]
+ // CHECK: memref.store %{{.*}}, %[[alloc]][%{{.*}}]
+ %0, %1, %2 = scf.execute_region -> (f32, tensor<?xf32>, f32) {
+ %t2 = tensor.insert %f1 into %t1[%idx] : tensor<?xf32>
+ scf.yield %f1, %t2, %f1 : f32, tensor<?xf32>, f32
+ }
+
+ // CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
+ // CHECK: %[[load:.*]] = memref.load %[[m1]]
+ %3 = tensor.extract %t1[%idx] : tensor<?xf32>
+
+ // CHECK: return %{{.*}}, %[[casted]], %[[load]] : f32, memref<?xf32, #{{.*}}>, f32
+ return %0, %1, %3 : f32, tensor<?xf32>, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inplace(
+// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref<?xf32{{.*}}>, %[[v:.*]]: vector
+func.func @scf_if_inplace(%cond: i1,
+ %t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index) -> tensor<?xf32> {
+
+ // CHECK: scf.if %[[cond]] {
+ // CHECK-NEXT: } else {
+ // CHECK-NEXT: vector.transfer_write %[[v]], %[[t1]]
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ %r = scf.if %cond -> (tensor<?xf32>) {
+ scf.yield %t1 : tensor<?xf32>
+ } else {
+ %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_inside_scf_for
+// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index
+// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] {
+// CHECK: scf.if %{{.*}} {
+// CHECK: } else {
+// CHECK: vector.transfer_write
+// CHECK: }
+// CHECK: }
+func.func @scf_if_inside_scf_for(
+ %t1: tensor<?xf32> {bufferization.writable = true},
+ %v: vector<5xf32>, %idx: index,
+ %cond: i1)
+ -> tensor<?xf32>
+{
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor<?xf32>) {
+ %r2 = scf.if %cond -> (tensor<?xf32>) {
+ scf.yield %bb : tensor<?xf32>
+ } else {
+ %t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+ scf.yield %r2 : tensor<?xf32>
+ }
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_if_non_equiv_yields(
+// CHECK-SAME: %[[cond:.*]]: i1, %[[A:.*]]: memref<{{.*}}>, %[[B:.*]]: memref<{{.*}}>) -> memref<{{.*}}>
+func.func @scf_if_non_equiv_yields(
+ %b : i1,
+ %A : tensor<4xf32> {bufferization.writable = false},
+ %B : tensor<4xf32> {bufferization.writable = false})
+ -> tensor<4xf32>
+{
+ // CHECK: %[[r:.*]] = arith.select %[[cond]], %[[A]], %[[B]]
+ %r = scf.if %b -> (tensor<4xf32>) {
+ scf.yield %A : tensor<4xf32>
+ } else {
+ scf.yield %B : tensor<4xf32>
+ }
+ // CHECK: return %[[r]]
+ return %r: tensor<4xf32>
+}
+
+// -----
+
+// Note: This bufferization is inefficient, but it bufferizes correctly.
+
+// CHECK-LABEL: func @scf_execute_region_yield_non_equivalent(
+// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}})
+// CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]]
+// CHECK: memref.dealloc %[[alloc]]
+// CHECK: %[[r:.*]] = memref.load %[[clone]][%{{.*}}]
+// CHECK: memref.dealloc %[[clone]]
+// CHECK: return %[[r]]
+func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 {
+ %r = scf.execute_region -> (tensor<?xf32>) {
+ %t2 = linalg.init_tensor [%i] : tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+ %f = tensor.extract %r[%j] : tensor<?xf32>
+ return %f : f32
+}
+
+// -----
+
+// Note: This bufferizes to inefficient code, but bufferization should not see
+// such IR in the first place. The iter_arg would canonicalize away. This test
+// case is just to ensure that the bufferization generates correct code.
+
+// CHECK-LABEL: func @scf_for_yield_non_equivalent(
+// CHECK-SAME: %[[t:.*]]: memref<?xf32
+// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}})
+// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[alloc]])
+// CHECK: memref.dealloc %[[iter]]
+// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}})
+// CHECK: memref.copy %[[t]], %[[alloc2]]
+// CHECK: scf.yield %[[alloc2]]
+// CHECK: return %[[for]]
+func.func @scf_for_yield_non_equivalent(
+ %t: tensor<?xf32>, %lb : index, %ub : index, %step : index) -> tensor<?xf32> {
+ %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor<?xf32> {
+ scf.yield %t : tensor<?xf32>
+ }
+
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// Note: This bufferizes to inefficient code, but bufferization should not see
+// such IR in the first place. The iter_arg would canonicalize away. This test
+// case is just to ensure that the bufferization generates correct code.
+
+// CHECK-LABEL: func @scf_for_yield_allocation(
+// CHECK-SAME: %[[t:.*]]: memref<?xf32
+// CHECK: %[[cloned:.*]] = bufferization.clone %[[t]]
+// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[cloned]])
+// This alloc is for the linalg.init_tensor.
+// CHECK-DAG: %[[alloc2:.*]] = memref.alloc(%{{.*}})
+// CHECK-DAG: memref.dealloc %[[iter]]
+// This alloc is for the scf.yield.
+// CHECK: %[[alloc3:.*]] = memref.alloc(%{{.*}})
+// CHECK: memref.copy %[[alloc2]], %[[alloc3]]
+// CHECK: memref.dealloc %[[alloc2]]
+// CHECK: %[[casted3:.*]] = memref.cast %[[alloc3]]
+// CHECK: scf.yield %[[casted3]]
+// CHECK: return %[[for]]
+func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,
+ %step : index) -> tensor<?xf32> {
+ %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor<?xf32> {
+ %t2 = linalg.init_tensor [%i] : tensor<?xf32>
+ scf.yield %t2 : tensor<?xf32>
+ }
+
+ return %r : tensor<?xf32>
+}
+
+// -----
+
+// TODO: The scf.yield could bufferize to 1 alloc and 2 copies (instead of
+// 2 allocs and 2 copies).
+
+// CHECK-LABEL: func @scf_for_swapping_yields(
+// CHECK-SAME: %[[A:.*]]: memref<?xf32, #{{.*}}>, %[[B:.*]]: memref<?xf32, #{{.*}}>
+func.func @scf_for_swapping_yields(
+ %A : tensor<?xf32>, %B : tensor<?xf32> {bufferization.writable = true},
+ %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
+ -> (f32, f32)
+{
+// CHECK-DAG: %[[clone1:.*]] = bufferization.clone %[[A]]
+// CHECK-DAG: %[[clone2:.*]] = bufferization.clone %[[B]]
+// CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[clone1]], %[[iter2:.*]] = %[[clone2]])
+ %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+ -> (tensor<?xf32>, tensor<?xf32>)
+ {
+// CHECK: %[[sv1:.*]] = memref.subview %[[iter1]]
+// CHECK: memref.copy %{{.*}}, %[[sv1]]
+ %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+// CHECK: %[[sv2:.*]] = memref.subview %[[iter2]]
+// CHECK: memref.copy %{{.*}}, %[[sv2]]
+ %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+// CHECK: %[[alloc2:.*]] = memref.alloc(%{{.*}})
+// CHECK: memref.copy %[[iter2]], %[[alloc2]]
+// CHECK: memref.dealloc %[[iter2]]
+// CHECK: %[[alloc1:.*]] = memref.alloc(%{{.*}})
+// CHECK: memref.copy %[[iter1]], %[[alloc1]]
+// CHECK: memref.dealloc %[[iter1]]
+// CHECK: %[[casted1:.*]] = memref.cast %[[alloc1]]
+// CHECK: %[[casted2:.*]] = memref.cast %[[alloc2]]
+// CHECK: scf.yield %[[casted2]], %[[casted1]]
+ // Yield tensors in
diff erent order.
+ scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
+ }
+
+// CHECK: %[[r0:.*]] = memref.load %[[for]]#0
+// CHECK: memref.dealloc %[[for]]#0
+// CHECK: %[[r1:.*]] = memref.load %[[for]]#1
+// CHECK: memref.dealloc %[[for]]#1
+ %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
+ %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
+// CHECK: return %[[r0]], %[[r1]]
+ return %f0, %f1: f32, f32
+}
More information about the Mlir-commits
mailing list