[Mlir-commits] [mlir] [mlir] Drop outermost dims in slice rank reduction inference (PR #95020)

Mon Jun 10 11:21:01 PDT 2024

https://github.com/Max191 created https://github.com/llvm/llvm-project/pull/95020

The `getDroppedDims` utility function does not follow the convention of dropping outermost unit dimensions first when inferring a rank reduction mask for a slice. This PR updates the implementation to match this convention.

>From cc3206c861f7d0459a5731ef82d5b78c6c40f189 Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Mon, 10 Jun 2024 13:42:20 -0400
Subject: [PATCH 1/3] [mlir] Drop outermost dims in slice rank reduction
 inference

---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 7fc29ec0139c2..72b1eebd8d08f 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -131,40 +131,40 @@ bool tensor::isSameTypeWithoutEncoding(Type tp1, Type tp2) {
 static llvm::SmallBitVector getDroppedDims(ArrayRef<int64_t> reducedShape,
                                            ArrayRef<OpFoldResult> mixedSizes) {
   llvm::SmallBitVector droppedDims(mixedSizes.size());
-  int64_t shapePos = 0;
+  int64_t shapePos = reducedShape.size() - 1;
 
-  for (const auto &size : enumerate(mixedSizes)) {
+  for (const auto &size : enumerate(llvm::reverse(mixedSizes))) {
+    size_t idx = mixedSizes.size() - size.index() - 1;
     // Rank-reduced dims must have a static unit dimension.
     bool isStaticUnitSize =
         size.value().is<Attribute>() &&
         llvm::cast<IntegerAttr>(size.value().get<Attribute>()).getInt() == 1;
 
-    if (shapePos == static_cast<int64_t>(reducedShape.size())) {
+    if (shapePos < 0) {
       // There are no more dims in the reduced shape. All remaining sizes must
       // be rank-reduced dims.
       assert(isStaticUnitSize && "expected unit dim");
-      droppedDims.set(size.index());
+      droppedDims.set(idx);
       continue;
     }
 
     // Dim is preserved if the size is not a static 1.
     if (!isStaticUnitSize) {
-      ++shapePos;
+      --shapePos;
       continue;
     }
 
     // Dim is preserved if the reduced shape dim is also 1.
     if (reducedShape[shapePos] == 1) {
-      ++shapePos;
+      --shapePos;
       continue;
     }
 
     // Otherwise: Dim is dropped.
-    droppedDims.set(size.index());
+    droppedDims.set(idx);
   }
 
-  assert(shapePos == static_cast<int64_t>(reducedShape.size()) &&
-         "dimension mismatch");
+  assert(shapePos < 0 && "dimension mismatch");
   return droppedDims;
 }
 

>From 29ac9c7632c89b1be82b76f4a117ba892f6c636d Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Mon, 10 Jun 2024 14:03:55 -0400
Subject: [PATCH 2/3] remove ambiguous test

---
 .../Dialect/Tensor/fold-tensor-subset-ops.mlir  | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
index f2e529b4cac95..e41739cd4e7f4 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
@@ -282,23 +282,6 @@ func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1x1xf32>, %
 
 // -----
 
-// CHECK-LABEL: func @insert_slice_of_insert_slice(
-//  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
-//  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
-//  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][5, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
-func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1xf32>, %r1: tensor<1x14xf32>, %pos: index)
-    -> tensor<1x14xf32> 
-{
-  %0 = tensor.insert_slice %t into %r0[2] [1] [1] 
-    : tensor<f32> into tensor<1xf32>
-  %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 
-    : tensor<1xf32> into tensor<1x14xf32>
-  return %1 : tensor<1x14xf32>
-}
-
-// -----
-
 // This test fails to fold because the size `4` and `%pos` do not match: 
 // this requires a copy
 // CHECK-LABEL: func @fail_insert_slice_of_insert_slice(

>From 036df374d2cd70fe298575bbb7793a46fa72faef Mon Sep 17 00:00:00 2001
From: Max Dawkins <max.dawkins at gmail.com>
Date: Mon, 10 Jun 2024 14:18:58 -0400
Subject: [PATCH 3/3] add test

---
 ...fold-tensor-subset-ops-into-vector-transfers.mlir | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
index e562cf8efc356..c2f21683d0cd6 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
@@ -102,6 +102,18 @@ func.func @insert_slice_of_transfer_write(%t1 : tensor<?x12xf32>, %v : vector<5x
   return %1 : tensor<?x12xf32>
 }
 
+// CHECK-LABEL: func @unit_insert_slice_of_unit_transfer_write(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<1x1x12xf32>, %[[v:.*]]: vector<1x6xf32>, %[[s:.*]]: index
+//       CHECK:   %[[c0:.*]] = arith.constant 0 : index
+//       CHECK:   %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c0]], %[[c0]], %[[s]]] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x1x12xf32>
+//       CHECK:   return %[[r]]
+func.func @unit_insert_slice_of_unit_transfer_write(%t1 : tensor<1x1x12xf32>, %v : vector<1x6xf32>, %s : index, %t2 : tensor<1x6xf32>) -> tensor<1x1x12xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x6xf32>
+  %1 = tensor.insert_slice %0 into %t1[0, 0, %s] [1, 1, 6] [1, 1, 1] : tensor<1x6xf32> into tensor<1x1x12xf32>
+  return %1 : tensor<1x1x12xf32>
+}
+
 // CHECK-LABEL: func @insert_slice_of_transfer_write_non_leading_rank_reduction(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?x?x12xf32>, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index
 //   CHECK-DAG:   %[[c3:.*]] = arith.constant 3 : index