[Mlir-commits] [mlir] 940a3f6 - [mlir][bufferize][NFC] Clean up test cases
Matthias Springer
llvmlistbot at llvm.org
Sat Apr 23 02:03:58 PDT 2022
Author: Matthias Springer
Date: 2022-04-23T18:00:55+09:00
New Revision: 940a3f6b3d1dc37f55497db0e4112426bef78828
URL: https://github.com/llvm/llvm-project/commit/940a3f6b3d1dc37f55497db0e4112426bef78828
DIFF: https://github.com/llvm/llvm-project/commit/940a3f6b3d1dc37f55497db0e4112426bef78828.diff
LOG: [mlir][bufferize][NFC] Clean up test cases
Run `one-shot-bufferize` instead of `linalg-comprehensive-module-bufferize` and move some test cases to their respective dialects.
Differential Revision: https://reviews.llvm.org/D124323
Added:
mlir/test/Dialect/Arithmetic/one-shot-bufferize.mlir
mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
Modified:
Removed:
mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
################################################################################
diff --git a/mlir/test/Dialect/Arithmetic/one-shot-bufferize.mlir b/mlir/test/Dialect/Arithmetic/one-shot-bufferize.mlir
new file mode 100644
index 0000000000000..4523981ea3221
--- /dev/null
+++ b/mlir/test/Dialect/Arithmetic/one-shot-bufferize.mlir
@@ -0,0 +1,61 @@
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
+
+// Run fuzzer with
diff erent seeds.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// Test bufferization using memref types that have no layout map.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// CHECK-LABEL: func @write_to_select_op_source
+// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
+func.func @write_to_select_op_source(
+ %t1 : tensor<?xf32> {bufferization.writable = true},
+ %t2 : tensor<?xf32> {bufferization.writable = true},
+ %c : i1)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ %cst = arith.constant 0.0 : f32
+ %idx = arith.constant 0 : index
+ // CHECK: %[[alloc:.*]] = memref.alloc
+ // CHECK: memref.copy %[[t1]], %[[alloc]]
+ // CHECK: memref.store %{{.*}}, %[[alloc]]
+ %w = tensor.insert %cst into %t1[%idx] : tensor<?xf32>
+ // CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[t1]], %[[t2]]
+ %s = arith.select %c, %t1, %t2 : tensor<?xf32>
+ // CHECK: return %[[select]], %[[alloc]]
+ return %s, %w : tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// Due to the out-of-place bufferization of %t1, buffers with
diff erent layout
+// maps are passed to arith.select. A cast must be inserted.
+
+// CHECK-LABEL: func @write_after_select_read_one
+// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
+func.func @write_after_select_read_one(
+ %t1 : tensor<?xf32> {bufferization.writable = true},
+ %t2 : tensor<?xf32> {bufferization.writable = true},
+ %c : i1)
+ -> (f32, tensor<?xf32>)
+{
+ %cst = arith.constant 0.0 : f32
+ %idx = arith.constant 0 : index
+
+ // CHECK: %[[alloc:.*]] = memref.alloc
+ // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
+ // CHECK-DAG: memref.copy %[[t1]], %[[alloc]]
+ // CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[casted]], %[[t2]]
+ %s = arith.select %c, %t1, %t2 : tensor<?xf32>
+
+ // CHECK: memref.store %{{.*}}, %[[select]]
+ %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
+
+ // CHECK: %[[f:.*]] = memref.load %[[t1]]
+ %f = tensor.extract %t1[%idx] : tensor<?xf32>
+
+ // CHECK: return %[[f]], %[[select]]
+ return %f, %w : f32, tensor<?xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
similarity index 99%
rename from mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
rename to mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
index 18f379041c5e5..b48a8af13eded 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only bufferize-function-boundaries" -split-input-file | FileCheck %s
/// All combinations of matmul(fill(extract(init_tensor)), fill(extract(%init_tensor)), %arg2)
/// These should all be inplaceable except the first op.
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
similarity index 53%
rename from mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
rename to mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 451ad9c174ad0..7a1072c75d234 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -1,12 +1,14 @@
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=allow-return-allocs -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
// Run fuzzer with
diff erent seeds.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
// Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+
+// TODO: Some test cases from this file should be moved to other dialects.
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
@@ -32,19 +34,6 @@ func.func @fill_inplace(
// -----
-// CHECK-LABEL: func @tensor_extract(%{{.*}}: memref<?xf32, #{{.*}}>) -> f32 {
-func.func @tensor_extract(%A : tensor<?xf32> {bufferization.writable = false}) -> (f32) {
- %c0 = arith.constant 0 : index
-
-// CHECK: %[[RES:.*]] = memref.load {{.*}} : memref<?xf32, #{{.*}}>
- %0 = tensor.extract %A[%c0] : tensor<?xf32>
-
-// CHECK: return %[[RES]] : f32
- return %0 : f32
-}
-
-// -----
-
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
/// No bufferization.writable flag, must allocate.
@@ -160,138 +149,6 @@ func.func @vec_not_inplace(
// -----
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_slice_fun
-// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
-// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
-// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>,
-// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @insert_slice_fun(
- %A0 : tensor<?xf32> {bufferization.writable = false},
- %A1 : tensor<?xf32> {bufferization.writable = true},
- %t0 : tensor<4xf32> {bufferization.writable = false},
- %t1 : tensor<4xf32> {bufferization.writable = true})
- -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
-{
- // Hoisted allocs.
- // CHECK: %[[REALLOC1:.*]] = memref.alloc
- // CHECK: %[[REALLOC2:.*]] = memref.alloc
- // CHECK: %[[REALLOC3:.*]] = memref.alloc
-
- // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
- // CHECK: memref.copy %[[A0]], %[[REALLOC3]]
- // CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC3]]
- // CHECK: memref.copy %[[t0]], %[[SV_A0]]
- %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
- // CHECK: memref.copy %[[A0]]
- // CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC2]]
- // CHECK: memref.copy %[[t1]], %[[SV_A0_2]]
- %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
- // CHECK: memref.copy %[[A1]]
- // CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC1]]
- // CHECK: memref.copy %[[t0]], %[[SV_A1]]
- %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // Do not realloc the large tensor. Copy the tensor.extract_slice.
- // CHECK-NOT: alloc
- // CHECK: %[[SV_A1_2:.*]] = memref.subview %[[A1]]
- // CHECK: memref.copy %[[t1]], %[[SV_A1_2]]
- %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // CHECK: return %[[REALLOC3]], %[[REALLOC2]], %[[REALLOC1]] :
- // CHECK-SAME: memref<?xf32>, memref<?xf32>, memref<?xf32>
- return %r0, %r1, %r2, %r3: tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_slice_fun
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @insert_slice_fun(
- %A : tensor<?xf32> {bufferization.writable = true},
- %t : tensor<4xf32> {bufferization.writable = false})
- -> tensor<?xf32>
-{
- %f0 = arith.constant 0.0 : f32
-
- // CHECK-NOT: alloc
- // CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
- // CHECK: memref.copy %[[t]], %[[SV_A]]
- %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- /// Overwrite A inplace.
- // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
- %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor<?xf32>) -> tensor<?xf32>
-
- // CHECK: return
- // CHECK-NOT: tensor
- return %r1: tensor<?xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_slice_fun
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @insert_slice_fun(
- %A : tensor<?xf32> {bufferization.writable = true},
- %t : tensor<4xf32> {bufferization.writable = false})
- -> tensor<?xf32>
-{
- %f0 = arith.constant 0.0 : f32
-
- // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
- %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
-
- // CHECK-NOT: alloc
- // CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
- /// Overwrite A inplace by copying into the subview.
- // CHECK: memref.copy %[[t]], %[[SV_A]]
- %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-NOT: tensor
- return %r1: tensor<?xf32>
-}
-
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_slice_fun_not_inplace
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func.func @insert_slice_fun_not_inplace(
- %A : tensor<?xf32> {bufferization.writable = false},
- %t : tensor<4xf32> {bufferization.writable = false})
- -> tensor<?xf32>
-{
- // CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
- // CHECK: memref.copy %[[A]], %[[ALLOC]] : memref<?xf32{{.*}} to memref<?xf32>
- // CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
- // CHECK: memref.copy %[[t]], %[[SV]] : memref<4xf32, #map> to memref<4xf32>
- %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // CHECK: return %{{.*}} : memref<?xf32>
- return %r0: tensor<?xf32>
-}
-
-// -----
-
-//===----------------------------------------------------------------------===//
-// Cross function boundary cases.
-//===----------------------------------------------------------------------===//
-
// CHECK: func @matmul(
// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<128x256xf32>
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32>
@@ -364,29 +221,7 @@ func.func @matmul(
// -----
-// CHECK-LABEL: func @tensor_cast_not_in_place(
-// CHECK-SAME: %[[A:.*]]: memref<?xf32{{.*}}>, %[[B:.*]]: memref<?xf32{{.*}}>
-// CHECK: %[[alloc:.*]] = memref.alloc
-// CHECK: memref.copy %[[A]], %[[alloc]]
-// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
-// CHECK: memref.copy %[[alloc]], %[[subview]]
-func.func @tensor_cast_not_in_place(
- %A : tensor<?xf32> {bufferization.writable = true},
- %B : tensor<?xf32> {bufferization.writable = false}, %idx: index)
- -> (tensor<?xf32>)
-{
- %r0 = tensor.cast %A : tensor<?xf32> to tensor<4xf32>
- %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
- return %r1 : tensor<?xf32>
-}
-
-// -----
-
-//===----------------------------------------------------------------------===//
-// Insertion point cases.
-//===----------------------------------------------------------------------===//
-
-/// These tests just check the produced IR is valid and does not have dominance
+/// This test just checks the produced IR is valid and does not have dominance
/// errors in the def-use chains.
// CHECK-LABEL: func @dominance_violation_bug_1
@@ -406,19 +241,6 @@ func.func @dominance_violation_bug_1(
return %rA : tensor<?x?xf32>
}
-
-// -----
-
-// CHECK-LABEL: func @insert_op
-// CHECK-SAME: %[[t1:.*]]: memref<?xf32, {{.*}}>, %[[s:.*]]: f32, %[[i:.*]]: index
-func.func @insert_op(%t1 : tensor<?xf32> {bufferization.writable = true},
- %s : f32, %i : index) -> tensor<?xf32> {
- // CHECK: memref.store %[[s]], %[[t1]][%[[i]]]
- %0 = tensor.insert %s into %t1[%i] : tensor<?xf32>
- // CHECK: return
- return %0 : tensor<?xf32>
-}
-
// -----
func.func @gather_like(
@@ -537,85 +359,3 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x
return %3 : tensor<?x1x6x8xf32>
}
-// -----
-
-// CHECK-LABEL: func @write_to_select_op_source
-// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
-func.func @write_to_select_op_source(
- %t1 : tensor<?xf32> {bufferization.writable = true},
- %t2 : tensor<?xf32> {bufferization.writable = true},
- %c : i1)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- %cst = arith.constant 0.0 : f32
- %idx = arith.constant 0 : index
- // CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK: memref.copy %[[t1]], %[[alloc]]
- // CHECK: memref.store %{{.*}}, %[[alloc]]
- %w = tensor.insert %cst into %t1[%idx] : tensor<?xf32>
- // CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[t1]], %[[t2]]
- %s = arith.select %c, %t1, %t2 : tensor<?xf32>
- // CHECK: return %[[select]], %[[alloc]]
- return %s, %w : tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @write_after_select_read_one
-// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
-func.func @write_after_select_read_one(
- %t1 : tensor<?xf32> {bufferization.writable = true},
- %t2 : tensor<?xf32> {bufferization.writable = true},
- %c : i1)
- -> (f32, tensor<?xf32>)
-{
- %cst = arith.constant 0.0 : f32
- %idx = arith.constant 0 : index
-
- // CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
- // CHECK-DAG: memref.copy %[[t1]], %[[alloc]]
- // CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[casted]], %[[t2]]
- %s = arith.select %c, %t1, %t2 : tensor<?xf32>
-
- // CHECK: memref.store %{{.*}}, %[[select]]
- %w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
-
- // CHECK: %[[f:.*]] = memref.load %[[t1]]
- %f = tensor.extract %t1[%idx] : tensor<?xf32>
-
- // CHECK: return %[[f]], %[[select]]
- return %f, %w : f32, tensor<?xf32>
-}
-
-// -----
-
-// A regression test to make sure that we handle rank-reducing extract_slice
-// correctly.
-
-// CHECK-LABEL: func @rank_reducing
-func.func @rank_reducing(
- %i: index, %j: index,
- %arg0: tensor<8x18x32xf32>)
- -> tensor<?x1x6x8xf32> {
- %c1 = arith.constant 1 : index
- %c6 = arith.constant 6 : index
- %c8 = arith.constant 8 : index
- %c32 = arith.constant 32 : index
- %c0 = arith.constant 0 : index
- %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
- %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
- %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
- %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor<?x1x6x8xf32>) {
- %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7)
- %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32>
- %9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) {
- %11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32>
- %12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32>
- scf.yield %12 : tensor<1x6x8xf32>
- }
- %10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor<?x1x6x8xf32>
- scf.yield %10 : tensor<?x1x6x8xf32>
- }
- return %5: tensor<?x1x6x8xf32>
-}
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
new file mode 100644
index 0000000000000..c9a7afd76fbb9
--- /dev/null
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -0,0 +1,197 @@
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
+
+// Run fuzzer with
diff erent seeds.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// Test bufferization using memref types that have no layout map.
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file -o /dev/null
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_slice_fun
+// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
+// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
+// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>,
+// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
+func.func @insert_slice_fun(
+ %A0 : tensor<?xf32> {bufferization.writable = false},
+ %A1 : tensor<?xf32> {bufferization.writable = true},
+ %t0 : tensor<4xf32> {bufferization.writable = false},
+ %t1 : tensor<4xf32> {bufferization.writable = true})
+ -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
+{
+ // Hoisted allocs.
+ // CHECK: %[[REALLOC1:.*]] = memref.alloc
+ // CHECK: %[[REALLOC2:.*]] = memref.alloc
+ // CHECK: %[[REALLOC3:.*]] = memref.alloc
+
+ // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
+ // CHECK: memref.copy %[[A0]], %[[REALLOC3]]
+ // CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC3]]
+ // CHECK: memref.copy %[[t0]], %[[SV_A0]]
+ %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
+ // CHECK: memref.copy %[[A0]]
+ // CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC2]]
+ // CHECK: memref.copy %[[t1]], %[[SV_A0_2]]
+ %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
+ // CHECK: memref.copy %[[A1]]
+ // CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC1]]
+ // CHECK: memref.copy %[[t0]], %[[SV_A1]]
+ %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // Do not realloc the large tensor. Copy the tensor.extract_slice.
+ // CHECK-NOT: alloc
+ // CHECK: %[[SV_A1_2:.*]] = memref.subview %[[A1]]
+ // CHECK: memref.copy %[[t1]], %[[SV_A1_2]]
+ %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // CHECK: return %[[REALLOC3]], %[[REALLOC2]], %[[REALLOC1]] :
+ // CHECK-SAME: memref<?xf32>, memref<?xf32>, memref<?xf32>
+ return %r0, %r1, %r2, %r3: tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_slice_fun
+// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
+func.func @insert_slice_fun(
+ %A : tensor<?xf32> {bufferization.writable = true},
+ %t : tensor<4xf32> {bufferization.writable = false})
+ -> tensor<?xf32>
+{
+ %f0 = arith.constant 0.0 : f32
+
+ // CHECK-NOT: alloc
+ // CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
+ // CHECK: memref.copy %[[t]], %[[SV_A]]
+ %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ /// Overwrite A inplace.
+ // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
+ %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor<?xf32>) -> tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-NOT: tensor
+ return %r1: tensor<?xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_slice_fun
+// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
+func.func @insert_slice_fun(
+ %A : tensor<?xf32> {bufferization.writable = true},
+ %t : tensor<4xf32> {bufferization.writable = false})
+ -> tensor<?xf32>
+{
+ %f0 = arith.constant 0.0 : f32
+
+ // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
+ %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+
+ // CHECK-NOT: alloc
+ // CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
+ /// Overwrite A inplace by copying into the subview.
+ // CHECK: memref.copy %[[t]], %[[SV_A]]
+ %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-NOT: tensor
+ return %r1: tensor<?xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_slice_fun_not_inplace
+// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
+func.func @insert_slice_fun_not_inplace(
+ %A : tensor<?xf32> {bufferization.writable = false},
+ %t : tensor<4xf32> {bufferization.writable = false})
+ -> tensor<?xf32>
+{
+ // CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
+ // CHECK: memref.copy %[[A]], %[[ALLOC]] : memref<?xf32{{.*}} to memref<?xf32>
+ // CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
+ // CHECK: memref.copy %[[t]], %[[SV]] : memref<4xf32, #map> to memref<4xf32>
+ %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+
+ // CHECK: return %{{.*}} : memref<?xf32>
+ return %r0: tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @tensor_cast_not_in_place(
+// CHECK-SAME: %[[A:.*]]: memref<?xf32{{.*}}>, %[[B:.*]]: memref<?xf32{{.*}}>
+// CHECK: %[[alloc:.*]] = memref.alloc
+// CHECK: memref.copy %[[A]], %[[alloc]]
+// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
+// CHECK: memref.copy %[[alloc]], %[[subview]]
+func.func @tensor_cast_not_in_place(
+ %A : tensor<?xf32> {bufferization.writable = true},
+ %B : tensor<?xf32> {bufferization.writable = false}, %idx: index)
+ -> (tensor<?xf32>)
+{
+ %r0 = tensor.cast %A : tensor<?xf32> to tensor<4xf32>
+ %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
+ return %r1 : tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @insert_op
+// CHECK-SAME: %[[t1:.*]]: memref<?xf32, {{.*}}>, %[[s:.*]]: f32, %[[i:.*]]: index
+func.func @insert_op(%t1 : tensor<?xf32> {bufferization.writable = true},
+ %s : f32, %i : index) -> tensor<?xf32> {
+ // CHECK: memref.store %[[s]], %[[t1]][%[[i]]]
+ %0 = tensor.insert %s into %t1[%i] : tensor<?xf32>
+ // CHECK: return
+ return %0 : tensor<?xf32>
+}
+
+// -----
+
+// A regression test to make sure that we handle rank-reducing extract_slice
+// correctly.
+
+// CHECK-LABEL: func @rank_reducing
+func.func @rank_reducing(
+ %i: index, %j: index,
+ %arg0: tensor<8x18x32xf32>)
+ -> tensor<?x1x6x8xf32> {
+ %c1 = arith.constant 1 : index
+ %c6 = arith.constant 6 : index
+ %c8 = arith.constant 8 : index
+ %c32 = arith.constant 32 : index
+ %c0 = arith.constant 0 : index
+ %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
+ %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
+ %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
+ %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor<?x1x6x8xf32>) {
+ %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7)
+ %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32>
+ %9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) {
+ %11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32>
+ %12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32>
+ scf.yield %12 : tensor<1x6x8xf32>
+ }
+ %10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor<?x1x6x8xf32>
+ scf.yield %10 : tensor<?x1x6x8xf32>
+ }
+ return %5: tensor<?x1x6x8xf32>
+}
More information about the Mlir-commits
mailing list