[Mlir-commits] [mlir] b6e7b1b - [mlir][linalg] Simplify padding test (NFC).
Tobias Gysi
llvmlistbot at llvm.org
Wed Nov 24 11:26:29 PST 2021
Author: Tobias Gysi
Date: 2021-11-24T19:21:50Z
New Revision: b6e7b1be732d2b35c1452731883de812a818ff2a
URL: https://github.com/llvm/llvm-project/commit/b6e7b1be732d2b35c1452731883de812a818ff2a
DIFF: https://github.com/llvm/llvm-project/commit/b6e7b1be732d2b35c1452731883de812a818ff2a.diff
LOG: [mlir][linalg] Simplify padding test (NFC).
The padding tests previously contained the tile loops. This revision removes the tile loops since padding itself does not consider the loops. Instead the induction variables are passed in as function arguments which promotes them to symbols in the affine expressions. Note that the pad-and-hoist.mlir test still exercises padding in the context of the full loop nest.
Depends On D114175
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D114227
Added:
Modified:
mlir/test/Dialect/Linalg/pad.mlir
Removed:
################################################################################
diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir
index 68734803276e4..d686f7da5a101 100644
--- a/mlir/test/Dialect/Linalg/pad.mlir
+++ b/mlir/test/Dialect/Linalg/pad.mlir
@@ -1,236 +1,183 @@
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-FILL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad pack-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=FILL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad pack-paddings=1,1,0 pad-inputs-only run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
-// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 12)>
-// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)>
-#map = affine_map<(d0) -> (7, -d0 + 12)>
-
-// CHECK: static_sizes_output_divisible
-// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
-// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>
+// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (7, -s0 + 12)>
+// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
+#map = affine_map<()[s0] -> (7, -s0 + 12)>
+
+// MATMUL: static_sizes_output_divisible
+// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32>
+// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>
+// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
+// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
- // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
- // CHECK-DAG: %[[C7:.*]] = arith.constant 7
- %c0 = arith.constant 0 : index
- %c12 = arith.constant 12 : index
- %c25 = arith.constant 25 : index
- %c24 = arith.constant 24 : index
- %c7 = arith.constant 7 : index
- %c5 = arith.constant 5 : index
- %c4 = arith.constant 4 : index
-
- // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
- %0 = scf.for %arg3 = %c0 to %c24 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24x25xf32>) {
-
- // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
- %1 = scf.for %arg5 = %c0 to %c25 step %c5 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) {
-
- // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] =
- %2 = scf.for %arg7 = %c0 to %c12 step %c7 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) {
-
- // CHECK: %[[TS2:.*]] = affine.min #[[MAP0]](%[[IV2]])
- %3 = affine.min #map(%arg7)
-
- // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
- // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG1]]
- // CHECK: %[[T2:.*]] = tensor.extract_slice %[[ARG4]]
- %4 = tensor.extract_slice %arg0[%arg3, %arg7] [4, %3] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
- %5 = tensor.extract_slice %arg1[%arg7, %arg5] [%3, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
- %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
-
- // Check statically sized matmul inputs with partially divisible sizes are padded.
- // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS2]])
- // CHECK: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold
- // CHECK-SAME: [%[[C0]], %[[C0]]]
- // CHECK-SAME: [%[[C0]], %[[V0]]
- // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold
-
- // Check the statically sized matmul output with fully divisible sizes is not padded.
- // CHECK: %[[T5:.*]] = linalg.matmul
- // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
- // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>)
- // CHECK: %[[T6:.*]] = tensor.insert_slice %[[T5]]
- %7 = linalg.matmul ins(%4, %5 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%6 : tensor<4x5xf32>) -> tensor<4x5xf32>
- %8 = tensor.insert_slice %7 into %arg8[%arg3, %arg5] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
-
- // CHECK: scf.yield %[[T6]]
- scf.yield %8 : tensor<24x25xf32>
- }
- scf.yield %2 : tensor<24x25xf32>
- }
- scf.yield %1 : tensor<24x25xf32>
- }
- return %0 : tensor<24x25xf32>
+ %arg2: tensor<24x25xf32>,
+ %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
+ // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
+
+ // MATMUL: %[[TS2:.*]] = affine.min #[[MAP0]]()[%[[IV2]]]
+ %0 = affine.min #map()[%iv2]
+
+ // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
+ // MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG1]]
+ // MATMUL: %[[T2:.*]] = tensor.extract_slice %[[ARG2]]
+ %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
+ %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
+ %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
+
+ // Check statically sized matmul inputs with partially divisible sizes are padded.
+ // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]]
+ // MATMUL: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold
+ // MATMUL-SAME: [%[[C0]], %[[C0]]]
+ // MATMUL-SAME: [%[[C0]], %[[V0]]
+ // MATMUL: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold
+
+ // Check the statically sized matmul output with fully divisible sizes is not padded.
+ // MATMUL: %[[T5:.*]] = linalg.matmul
+ // MATMUL-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
+ // MATMUL-SAME: outs(%[[T2]] : tensor<4x5xf32>)
+ // MATMUL: %[[T6:.*]] = tensor.insert_slice %[[T5]]
+ %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+ %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
+ return %5 : tensor<24x25xf32>
}
// -----
-// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 25)>
-// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)>
-#map = affine_map<(d0) -> (7, -d0 + 25)>
+// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (7, -s0 + 25)>
+// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
+#map = affine_map<()[s0] -> (7, -s0 + 25)>
-// CHECK: static_sizes_input_divisible
+// MATMUL: static_sizes_input_divisible
+// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>
+// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
- %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
- // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
- // CHECK-DAG: %[[C7:.*]] = arith.constant 7
- %c0 = arith.constant 0 : index
- %c12 = arith.constant 12 : index
- %c25 = arith.constant 25 : index
- %c24 = arith.constant 24 : index
- %c6 = arith.constant 6 : index
- %c7 = arith.constant 7 : index
- %c4 = arith.constant 4 : index
-
- // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
- %0 = scf.for %arg3 = %c0 to %c24 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24x25xf32>) {
-
- // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
- %1 = scf.for %arg5 = %c0 to %c25 step %c7 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) {
-
- // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] =
- %2 = scf.for %arg7 = %c0 to %c12 step %c6 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) {
- %3 = tensor.extract_slice %arg0[%arg3, %arg7] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32>
-
- // CHECK: %[[TS1:.*]] = affine.min #[[MAP0]](%[[IV1]])
- %4 = affine.min #map(%arg5)
- %5 = tensor.extract_slice %arg1[%arg7, %arg5] [6, %4] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32>
-
- // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG4]]
- %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, %4] [1, 1] : tensor<24x25xf32> to tensor<4x?xf32>
-
- // Check the statically sized matmul output with partially divisible sizes is padded.
- // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS1]])
- // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low
- // CHECK-SAME: [%[[C0]], %[[C0]]]
- // CHECK-SAME: [%[[C0]], %[[V0]]
-
- // CHECK: %[[T2:.*]] = linalg.matmul
- // CHECK-SAME: outs(%[[T1]] : tensor<4x7xf32>)
- // CHECK: %[[T3:.*]] = tensor.extract_slice %[[T2]]
- // CHECK: %[[T4:.*]] = tensor.insert_slice %[[T3]]
- %7 = linalg.matmul ins(%3, %5 : tensor<4x6xf32>, tensor<6x?xf32>) outs(%6 : tensor<4x?xf32>) -> tensor<4x?xf32>
- %8 = tensor.insert_slice %7 into %arg8[%arg3, %arg5] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32>
-
- // CHECK: scf.yield %[[T4]]
- scf.yield %8 : tensor<24x25xf32>
- }
- scf.yield %2 : tensor<24x25xf32>
- }
- scf.yield %1 : tensor<24x25xf32>
- }
- return %0 : tensor<24x25xf32>
+ %arg2: tensor<24x25xf32>,
+ %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
+ // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
+
+ %3 = tensor.extract_slice %arg0[%iv0, %iv2] [4, 6] [1, 1] : tensor<24x12xf32> to tensor<4x6xf32>
+
+ // MATMUL: %[[TS1:.*]] = affine.min #[[MAP0]]()[%[[IV1]]]
+ %4 = affine.min #map()[%iv1]
+ %5 = tensor.extract_slice %arg1[%iv2, %iv1] [6, %4] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32>
+
+ // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG2]]
+ %6 = tensor.extract_slice %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<24x25xf32> to tensor<4x?xf32>
+
+ // Check the statically sized matmul output with partially divisible sizes is padded.
+ // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]]
+ // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low
+ // MATMUL-SAME: [%[[C0]], %[[C0]]]
+ // MATMUL-SAME: [%[[C0]], %[[V0]]
+
+ // MATMUL: %[[T2:.*]] = linalg.matmul
+ // MATMUL-SAME: outs(%[[T1]] : tensor<4x7xf32>)
+ // MATMUL: %[[T3:.*]] = tensor.extract_slice %[[T2]]
+ // MATMUL: %[[T4:.*]] = tensor.insert_slice %[[T3]]
+ %7 = linalg.matmul ins(%3, %5 : tensor<4x6xf32>, tensor<6x?xf32>) outs(%6 : tensor<4x?xf32>) -> tensor<4x?xf32>
+ %8 = tensor.insert_slice %7 into %arg2[%iv0, %iv1] [4, %4] [1, 1] : tensor<4x?xf32> into tensor<24x25xf32>
+
+ // MATMUL: return %[[T4]]
+ return %8 : tensor<24x25xf32>
}
// -----
-// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0)[s0] -> (5, -d0 + s0)>
-// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0)[s0] -> (7, -d0 + s0)>
-// CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0)[s0] -> (6, -d0 + s0)>
-// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 5)>
-// CHECK-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 6)>
-
-#map0 = affine_map<(d0)[s0] -> (5, -d0 + s0)>
-#map1 = affine_map<(d0)[s0] -> (6, -d0 + s0)>
-#map2 = affine_map<(d0)[s0] -> (7, -d0 + s0)>
-
-// CHECK: dynamic_sizes
-// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x?xf32>
-// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<?x?xf32>
-// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<?x?xf32>
+// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0, s1] -> (5, -s0 + s1)>
+// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0, s1] -> (7, -s0 + s1)>
+// MATMUL-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<()[s0, s1] -> (6, -s0 + s1)>
+// MATMUL-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 5)>
+// MATMUL-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 6)>
+
+#map0 = affine_map<()[s0, s1] -> (5, -s0 + s1)>
+#map1 = affine_map<()[s0, s1] -> (6, -s0 + s1)>
+#map2 = affine_map<()[s0, s1] -> (7, -s0 + s1)>
+
+// MATMUL: dynamic_sizes
+// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x?xf32>
+// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<?x?xf32>
+// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<?x?xf32>
+// MATMUL-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV1:[0-9a-zA-Z]*]]: index
+// MATMUL-SAME: %[[IV2:[0-9a-zA-Z]*]]: index
func @dynamic_sizes(%arg0: tensor<?x?xf32>,
%arg1: tensor<?x?xf32>,
- %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
- // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
- // CHECK-DAG: %[[C1:.*]] = arith.constant 1
- // CHECK-DAG: %[[C5:.*]] = arith.constant 5
- // CHECK-DAG: %[[C6:.*]] = arith.constant 6
+ %arg2: tensor<?x?xf32>,
+ %iv0 : index, %iv1 : index, %iv2 : index) -> tensor<?x?xf32> {
+ // MATMUL-DAG: %[[C0:.*]] = arith.constant 0 : index
+ // MATMUL-DAG: %[[C1:.*]] = arith.constant 1
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
- %c6 = arith.constant 6 : index
- %c7 = arith.constant 7 : index
- %c5 = arith.constant 5 : index
- // CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]]
- // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C1]]
- // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG1]], %[[C1]]
+ // MATMUL-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]]
+ // MATMUL-DAG: %[[D2:.*]] = tensor.dim %[[ARG0]], %[[C1]]
+ // MATMUL-DAG: %[[D1:.*]] = tensor.dim %[[ARG1]], %[[C1]]
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
%2 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
- // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
- %3 = scf.for %arg3 = %c0 to %0 step %c5 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
-
- // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
- %4 = scf.for %arg5 = %c0 to %2 step %c7 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
-
- // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] =
- %5 = scf.for %arg7 = %c0 to %1 step %c6 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
-
- // CHECK: %[[TS0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[D0]]]
- // CHECK: %[[TS2:.*]] = affine.min #[[MAP2]](%[[IV2]])[%[[D2]]]
- // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]])[%[[D1]]]
- %6 = affine.min #map0(%arg3)[%0]
- %7 = affine.min #map1(%arg7)[%1]
- %8 = tensor.extract_slice %arg0[%arg3, %arg7] [%6, %7] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
- %9 = affine.min #map2(%arg5)[%2]
- %10 = tensor.extract_slice %arg1[%arg7, %arg5] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
- %11 = tensor.extract_slice %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-
- // Check all matmul operands are padded.
- // CHECK: %[[V0:.*]] = affine.apply #[[MAP3]](%[[TS0]])
- // CHECK: %[[V1:.*]] = affine.apply #[[MAP4]](%[[TS2]])
- // CHECK: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold
- // CHECK-SAME: [%[[C0]], %[[C0]]]
- // CHECK-SAME: [%[[V0]], %[[V1]]
- // CHECK: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold
- // CHECK: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low
-
- // Check the dynamic matmul has been erased.
- // CHECK-NOT: = linalg.matmul {{.*}} tensor<?x?xf32>
-
- // Check all padded matmul operands are statically sized.
- // CHECK: %[[T6:.*]] = linalg.matmul
- // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<5x6xf32>, tensor<6x7xf32>)
- // CHECK-SAME: outs(%[[T5]] : tensor<5x7xf32>)
- // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]][0, 0] [%[[TS0]], %[[TS1]]]
- // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]]
- %12 = linalg.matmul ins(%8, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%11 : tensor<?x?xf32>) -> tensor<?x?xf32>
- %13 = tensor.insert_slice %12 into %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
-
- // CHECK: scf.yield %[[T8]]
- scf.yield %13 : tensor<?x?xf32>
- }
- scf.yield %5 : tensor<?x?xf32>
- }
- scf.yield %4 : tensor<?x?xf32>
- }
- return %3 : tensor<?x?xf32>
+ // MATMUL: %[[TS0:.*]] = affine.min #[[MAP0]]()[%[[IV0]], %[[D0]]]
+ // MATMUL: %[[TS2:.*]] = affine.min #[[MAP2]]()[%[[IV2]], %[[D2]]]
+ // MATMUL: %[[TS1:.*]] = affine.min #[[MAP1]]()[%[[IV1]], %[[D1]]]
+ %6 = affine.min #map0()[%iv0, %0]
+ %7 = affine.min #map1()[%iv2, %1]
+ %8 = tensor.extract_slice %arg0[%iv0, %iv2] [%6, %7] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %9 = affine.min #map2()[%iv1, %2]
+ %10 = tensor.extract_slice %arg1[%iv2, %iv1] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+ %11 = tensor.extract_slice %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+
+ // Check all matmul operands are padded.
+ // MATMUL: %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]]
+ // MATMUL: %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]]
+ // MATMUL: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold
+ // MATMUL-SAME: [%[[C0]], %[[C0]]]
+ // MATMUL-SAME: [%[[V0]], %[[V1]]
+ // MATMUL: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold
+ // MATMUL: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low
+
+ // Check the dynamic matmul has been erased.
+ // MATMUL-NOT: = linalg.matmul {{.*}} tensor<?x?xf32>
+
+ // Check all padded matmul operands are statically sized.
+ // MATMUL: %[[T6:.*]] = linalg.matmul
+ // MATMUL-SAME: ins(%[[T3]], %[[T4]] : tensor<5x6xf32>, tensor<6x7xf32>)
+ // MATMUL-SAME: outs(%[[T5]] : tensor<5x7xf32>)
+ // MATMUL: %[[T7:.*]] = tensor.extract_slice %[[T6]][0, 0] [%[[TS0]], %[[TS1]]]
+ // MATMUL: %[[T8:.*]] = tensor.insert_slice %[[T7]]
+ %12 = linalg.matmul ins(%8, %10 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%11 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %13 = tensor.insert_slice %12 into %arg2[%iv0, %iv1] [%6, %9] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+
+ // MATMUL: return %[[T8]]
+ return %13 : tensor<?x?xf32>
}
// -----
-#map0 = affine_map<(d0) -> (64, d0)>
+#map0 = affine_map<()[s0] -> (64, s0)>
-// CHECK: compose_padding
-// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
+// MATMUL: compose_padding
+// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<64x64xf32>
func @compose_padding(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
- // CHECK: %[[SIZE:.*]] = affine.min
- %size = affine.min #map0(%iv0)
+ // MATMUL: %[[SIZE:.*]] = affine.min
+ %size = affine.min #map0()[%iv0]
- // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
- // CHECK-SAME: [0, 0]
- // CHECK-SAME: [%[[SIZE]], %[[SIZE]]]
- // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
- // CHECK: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]
- // CHECK: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]
+ // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
+ // MATMUL-SAME: [0, 0]
+ // MATMUL-SAME: [%[[SIZE]], %[[SIZE]]]
+ // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]]
+ // MATMUL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]
+ // MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index): // no predecessors
@@ -241,28 +188,28 @@ func @compose_padding(%arg0: tensor<64x64xf32>,
%4 = tensor.extract_slice %3[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Check there are no additional pad tensor operations.
- // CHECK-NOT: linalg.pad_tensor
+ // MATMUL-NOT: linalg.pad_tensor
// Check the matmul directly uses the result of the fill operation.
- // CHECK: %[[T4:.*]] = linalg.matmul ins(%[[T3]]
- // CHECK: %[[T5:.*]] = tensor.extract_slice %[[T4]]
- // CHECK-SAME: [0, 0]
- // CHECK-SAME: [%[[SIZE]], %[[SIZE]]]
+ // MATMUL: %[[T4:.*]] = linalg.matmul ins(%[[T3]]
+ // MATMUL: %[[T5:.*]] = tensor.extract_slice %[[T4]]
+ // MATMUL-SAME: [0, 0]
+ // MATMUL-SAME: [%[[SIZE]], %[[SIZE]]]
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
- // CHECK: return %[[T5]]
+ // MATMUL: return %[[T5]]
return %5 : tensor<?x?xf32>
}
// -----
-#map0 = affine_map<(d0) -> (64, d0)>
+#map0 = affine_map<()[s0] -> (64, s0)>
-// CHECK:
diff erent_padding_values
+// MATMUL:
diff erent_padding_values
func @
diff erent_padding_values(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 42.0 : f32
- %size = affine.min #map0(%iv0)
+ %size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index): // no predecessors
@@ -272,22 +219,22 @@ func @
diff erent_padding_values(%arg0: tensor<64x64xf32>,
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Different padding values prevent composing the paddings (42.0 vs. 0.0).
- // CHECK: = linalg.fill
- // CHECK: = linalg.pad_tensor
- // CHECK: = linalg.matmul
+ // MATMUL: = linalg.fill
+ // MATMUL: = linalg.pad_tensor
+ // MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
}
// -----
-#map0 = affine_map<(d0) -> (64, d0)>
+#map0 = affine_map<()[s0] -> (64, s0)>
-// CHECK:
diff erent_padding_dynamic_sizes
+// MATMUL:
diff erent_padding_dynamic_sizes
func @
diff erent_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
- %size = affine.min #map0(%iv0)
+ %size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index): // no predecessors
@@ -297,22 +244,22 @@ func @
diff erent_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
// Different dynamic sizes prevent composing the paddings (%iv0 vs %size).
- // CHECK: = linalg.fill
- // CHECK: = linalg.pad_tensor
- // CHECK: = linalg.matmul
+ // MATMUL: = linalg.fill
+ // MATMUL: = linalg.pad_tensor
+ // MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
}
// -----
-#map0 = affine_map<(d0) -> (64, d0)>
+#map0 = affine_map<()[s0] -> (64, s0)>
-// CHECK:
diff erent_padding_static_sizes
+// MATMUL:
diff erent_padding_static_sizes
func @
diff erent_padding_static_sizes(%arg0: tensor<62x62xf32>,
%iv0 : index) -> tensor<?x?xf32> {
%cst = arith.constant 0.0 : f32
- %size = affine.min #map0(%iv0)
+ %size = affine.min #map0()[%iv0]
%0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
%1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] {
^bb0(%arg3: index, %arg4: index): // no predecessors
@@ -322,55 +269,42 @@ func @
diff erent_padding_static_sizes(%arg0: tensor<62x62xf32>,
%4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
// Different static sizes prevent composing the paddings (62 vs 64 derived from #map0).
- // CHECK: = linalg.fill
- // CHECK: = linalg.pad_tensor
- // CHECK: = linalg.matmul
+ // MATMUL: = linalg.fill
+ // MATMUL: = linalg.pad_tensor
+ // MATMUL: = linalg.matmul
%5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %5 : tensor<?x?xf32>
}
// -----
-#map = affine_map<(d0) -> (7, -d0 + 12)>
+#map0 = affine_map<()[s0] -> (7, s0)>
-// CHECK-FILL: scalar_operand
-// CHECK-FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32
-// CHECK-FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32>
-func @scalar_operand(%arg0: f32, %arg1: tensor<24x12xf32>) -> tensor<24x12xf32> {
- %c0 = arith.constant 0 : index
- %c12 = arith.constant 12 : index
- %c24 = arith.constant 24 : index
- %c7 = arith.constant 7 : index
- %c4 = arith.constant 4 : index
-
- // CHECK-FILL: scf.for %[[IV0:[0-9a-zA-Z]*]] =
- %0 = scf.for %arg2 = %c0 to %c24 step %c4 iter_args(%arg3 = %arg1) -> (tensor<24x12xf32>) {
-
- // CHECK-FILL: scf.for %[[IV1:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG2:.*]] =
- %1 = scf.for %arg4 = %c0 to %c12 step %c7 iter_args(%arg5 = %arg3) -> (tensor<24x12xf32>) {
- %2 = affine.min #map(%arg4)
-
- // CHECK-FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG2]]
- // CHECK-FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold
- %3 = tensor.extract_slice %arg5[%arg2, %arg4] [4, %2] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
-
- // Check only the fill output operand is padded.
- // CHECK-FILL: %[[T6:.*]] = linalg.fill(%[[ARG0]], %[[T1]]
- %4 = linalg.fill(%arg0, %3) : f32, tensor<4x?xf32> -> tensor<4x?xf32>
- %5 = tensor.insert_slice %4 into %arg5[%arg2, %arg4] [4, %2] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32>
- scf.yield %5 : tensor<24x12xf32>
- }
- scf.yield %1 : tensor<24x12xf32>
- }
- return %0 : tensor<24x12xf32>
+// FILL: scalar_operand
+// FILL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: f32
+// FILL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<24x12xf32>
+func @scalar_operand(%arg0: f32,
+ %arg1: tensor<24x12xf32>,
+ %iv0 : index) -> tensor<24x12xf32> {
+ %0 = affine.min #map0()[%iv0]
+
+ // FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
+ // FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold
+ %1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
+
+ // Check only the fill output operand is padded.
+ // FILL: %[[T6:.*]] = linalg.fill(%[[ARG0]], %[[T1]]
+ %2 = linalg.fill(%arg0, %1) : f32, tensor<4x?xf32> -> tensor<4x?xf32>
+ %3 = tensor.insert_slice %2 into %arg1[0, 0] [4, %0] [1, 1] : tensor<4x?xf32> into tensor<24x12xf32>
+ return %3 : tensor<24x12xf32>
}
// -----
#map0 = affine_map<()[s0] -> (7, s0)>
-// CHECK: static_extract_slice_missing
-// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<4x5xf32>,
+// MATMUL: static_extract_slice_missing
+// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<4x5xf32>,
func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<4x5xf32>,
@@ -380,10 +314,10 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
%2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
// Check the matmul inputs are padded despite the missing slice for the static output.
- // CHECK: %[[T0:.*]] = linalg.pad_tensor
- // CHECK: %[[T1:.*]] = linalg.pad_tensor
- // CHECK: = linalg.matmul ins(%[[T0]], %[[T1]]
- // CHECK-SAME: outs(%[[ARG2]]
+ // MATMUL: %[[T0:.*]] = linalg.pad_tensor
+ // MATMUL: %[[T1:.*]] = linalg.pad_tensor
+ // MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]]
+ // MATMUL-SAME: outs(%[[ARG2]]
%3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32>
return %3 : tensor<4x5xf32>
}
@@ -392,24 +326,24 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
#map0 = affine_map<()[s0] -> (7, s0)>
-// CHECK: dynamic_extract_slice_missing
-// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?xf32>,
-// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>,
-// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>,
+// MATMUL: dynamic_extract_slice_missing
+// MATMUL-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?xf32>,
+// MATMUL-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32>,
+// MATMUL-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32>,
func @dynamic_extract_slice_missing(%arg0: tensor<4x?xf32>,
%arg1: tensor<12x25xf32>,
%arg2: tensor<24x25xf32>,
%iv0 : index, %iv1 : index, %iv2 : index) -> tensor<24x25xf32> {
%0 = affine.min #map0()[%iv2]
- // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
- // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG2]]
+ // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
+ // MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG2]]
%2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
%3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
// Check the matmul is not padded due to the missing slice for the dynamic input.
- // CHECK: = linalg.matmul ins(%[[ARG0]], %[[T0]]
- // CHECK-SAME: outs(%[[T1]]
+ // MATMUL: = linalg.matmul ins(%[[ARG0]], %[[T0]]
+ // MATMUL-SAME: outs(%[[T1]]
%4 = linalg.matmul ins(%arg0, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
return %5 : tensor<24x25xf32>
More information about the Mlir-commits
mailing list