[Mlir-commits] [mlir] [mlir][vector] Fix FlattenGather for scalable vectors (PR #96074)

Wed Jun 19 06:56:52 PDT 2024

https://github.com/c-rhodes created https://github.com/llvm/llvm-project/pull/96074

This pattern flattens vector.gather ops by unrolling the outermost
dimension for rank > 2 vectors. There's two issues with this pattern for
scalable vectors:

  1. The unrolling doesn't take vscale into account. A constraint is
     added to disable this pattern for vectors with leading scalable
     dims.
  2. The scalable dims are dropped when creating the new gather. Fixed
     by propagating the flags.

Depends on #96049.

>From 1764144757ded52688a7ebd2e49eefeb7d6f294e Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Wed, 19 Jun 2024 09:22:14 +0000
Subject: [PATCH 1/2] [mlir][vector] Disable Gather1DToConditionalLoads for
 scalable vectors

Pattern scalarizes vector.gather operations and is incorrect for
scalable vectors.
---
 .../Dialect/Vector/Transforms/LowerVectorGather.cpp    |  3 +++
 mlir/test/Dialect/Vector/vector-gather-lowering.mlir   | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
index 90128126d0fa1..dd027d107d16a 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
@@ -189,6 +189,9 @@ struct Gather1DToConditionalLoads : OpRewritePattern<vector::GatherOp> {
     if (resultTy.getRank() != 1)
       return rewriter.notifyMatchFailure(op, "unsupported rank");
 
+    if (resultTy.isScalable())
+      return rewriter.notifyMatchFailure(op, "not a fixed-width vector");
+
     Location loc = op.getLoc();
     Type elemTy = resultTy.getElementType();
     // Vector type with a single element. Used to generate `vector.loads`.
diff --git a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
index d047ac629d87e..c2eb88afa4dbf 100644
--- a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
@@ -206,3 +206,13 @@ func.func @strided_gather(%base : memref<100x3xf32>,
 // CHECK:           scf.if %[[MASK_3]] -> (vector<4xf32>)
 // CHECK:             %[[M_3:.*]] = vector.load %[[COLLAPSED]][%[[IDX_3]]] : memref<300xf32>, vector<1xf32>
 // CHECK:             %[[V_3:.*]] = vector.extract %[[M_3]][0] : f32 from vector<1xf32>
+
+// CHECK-LABEL: @scalable_gather_1d
+// CHECK-NOT: extract
+// CHECK: vector.gather
+// CHECK-NOT: extract
+func.func @scalable_gather_1d(%base: tensor<?xf32>, %v: vector<[2]xindex>, %mask: vector<[2]xi1>, %pass_thru: vector<[2]xf32>) -> vector<[2]xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = vector.gather %base[%c0][%v], %mask, %pass_thru : tensor<?xf32>, vector<[2]xindex>, vector<[2]xi1>, vector<[2]xf32> into vector<[2]xf32>
+  return %0 : vector<[2]xf32>
+}

>From 9ee83cc9126f58684b1a8b8263239447ef9174de Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Wed, 19 Jun 2024 13:41:06 +0000
Subject: [PATCH 2/2] [mlir][vector] Fix FlattenGather for scalable vectors

This pattern flattens vector.gather ops by unrolling the outermost
dimension for rank > 2 vectors. There's two issues with this pattern for
scalable vectors:

  1. The unrolling doesn't take vscale into account. A constraint is
     added to disable this pattern for vectors with leading scalable
     dims.
  2. The scalable dims are dropped when creating the new gather. Fixed
     by propagating the flags.

Depends on #96049.
---
 .../mlir/Dialect/Vector/Utils/VectorUtils.h   |  5 ++++
 .../Vector/Transforms/LowerVectorGather.cpp   | 11 +++++++-
 .../Transforms/LowerVectorShapeCast.cpp       |  5 ----
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp |  5 ++++
 .../Vector/vector-gather-lowering.mlir        | 26 +++++++++++++++++++
 5 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
index 9c83acc76e77a..923436865c176 100644
--- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
+++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
@@ -211,6 +211,11 @@ Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source,
 ///      static sizes in `shape`.
 LogicalResult isValidMaskedInputVector(ArrayRef<int64_t> shape,
                                        ArrayRef<int64_t> inputVectorSizes);
+
+/// Returns true if the leading dim(s) of `type` are fixed and the trailing dim
+/// is scalable.
+bool isTrailingDimScalable(VectorType type);
+
 } // namespace vector
 
 /// Constructs a permutation map of invariant memref indices to vector
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
index dd027d107d16a..1bafe926e16d7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
@@ -55,6 +55,8 @@ namespace {
 /// ```
 ///
 /// When applied exhaustively, this will produce a sequence of 1-d gather ops.
+///
+/// Supports vector types with trailing scalable dim.
 struct FlattenGather : OpRewritePattern<vector::GatherOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -64,6 +66,12 @@ struct FlattenGather : OpRewritePattern<vector::GatherOp> {
     if (resultTy.getRank() < 2)
       return rewriter.notifyMatchFailure(op, "already flat");
 
+    // Unrolling doesn't take vscale into account. Pattern is disabled for
+    // vectors with leading scalable dim(s).
+    if (resultTy.isScalable() && !isTrailingDimScalable(resultTy))
+      return rewriter.notifyMatchFailure(
+          op, "vector type must be fixed-width or scalable in trailing dim");
+
     Location loc = op.getLoc();
     Value indexVec = op.getIndexVec();
     Value maskVec = op.getMask();
@@ -73,7 +81,8 @@ struct FlattenGather : OpRewritePattern<vector::GatherOp> {
         loc, resultTy, rewriter.getZeroAttr(resultTy));
 
     Type subTy = VectorType::get(resultTy.getShape().drop_front(),
-                                 resultTy.getElementType());
+                                 resultTy.getElementType(),
+                                 resultTy.getScalableDims().drop_front());
 
     for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
       int64_t thisIdx[1] = {i};
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp
index 85c4c03d383f7..40be9b3a87c81 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp
@@ -342,11 +342,6 @@ class ScalableShapeCastOpRewritePattern
     rewriter.replaceOp(op, result);
     return success();
   }
-
-  static bool isTrailingDimScalable(VectorType type) {
-    return type.getRank() >= 1 && type.getScalableDims().back() &&
-           !llvm::is_contained(type.getScalableDims().drop_back(), true);
-  }
 };
 
 } // namespace
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index 4ed5a8bac20d1..6a979d669f1a6 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -396,3 +396,8 @@ vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
   }
   return success();
 }
+
+bool vector::isTrailingDimScalable(VectorType type) {
+  return type.getRank() >= 1 && type.getScalableDims().back() &&
+         !llvm::is_contained(type.getScalableDims().drop_back(), true);
+}
diff --git a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
index c2eb88afa4dbf..ff1a92a65c42d 100644
--- a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
@@ -74,6 +74,32 @@ func.func @gather_memref_1d_i32_index(%base: memref<?xf32>, %v: vector<2xi32>, %
   return %0 : vector<2x3xf32>
  }
 
+// CHECK-LABEL: @scalable_gather_memref_2d
+// CHECK-SAME:      %[[BASE:.*]]: memref<?x?xf32>,
+// CHECK-SAME:      %[[IDXVEC:.*]]: vector<2x[3]xindex>,
+// CHECK-SAME:      %[[MASK:.*]]: vector<2x[3]xi1>,
+// CHECK-SAME:      %[[PASS:.*]]: vector<2x[3]xf32>
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK:         %[[C1:.*]] = arith.constant 1 : index
+// CHECK:         %[[INIT:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32>
+// CHECK:         %[[IDXVEC0:.*]] = vector.extract %[[IDXVEC]][0] : vector<[3]xindex> from vector<2x[3]xindex>
+// CHECK:         %[[MASK0:.*]] = vector.extract %[[MASK]][0] : vector<[3]xi1> from vector<2x[3]xi1>
+// CHECK:         %[[PASS0:.*]] = vector.extract %[[PASS]][0] : vector<[3]xf32> from vector<2x[3]xf32>
+// CHECK:         %[[GATHER0:.*]] = vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C1]]] {{\[}}%[[IDXVEC0]]], %[[MASK0]], %[[PASS0]] : memref<?x?xf32>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
+// CHECK:         %[[INS0:.*]] = vector.insert %[[GATHER0]], %[[INIT]] [0] : vector<[3]xf32> into vector<2x[3]xf32>
+// CHECK:         %[[IDXVEC1:.*]] = vector.extract %[[IDXVEC]][1] : vector<[3]xindex> from vector<2x[3]xindex>
+// CHECK:         %[[MASK1:.*]] = vector.extract %[[MASK]][1] : vector<[3]xi1> from vector<2x[3]xi1>
+// CHECK:         %[[PASS1:.*]] = vector.extract %[[PASS]][1] : vector<[3]xf32> from vector<2x[3]xf32>
+// CHECK:         %[[GATHER1:.*]] = vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C1]]] {{\[}}%[[IDXVEC1]]], %[[MASK1]], %[[PASS1]] : memref<?x?xf32>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
+// CHECK:         %[[INS1:.*]] = vector.insert %[[GATHER1]], %[[INS0]] [1] : vector<[3]xf32> into vector<2x[3]xf32>
+// CHECK-NEXT:    return %[[INS1]] : vector<2x[3]xf32>
+func.func @scalable_gather_memref_2d(%base: memref<?x?xf32>, %v: vector<2x[3]xindex>, %mask: vector<2x[3]xi1>, %pass_thru: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = vector.gather %base[%c0, %c1][%v], %mask, %pass_thru : memref<?x?xf32>, vector<2x[3]xindex>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
+ return %0 : vector<2x[3]xf32>
+}
+
 // CHECK-LABEL: @gather_tensor_1d
 // CHECK-SAME:    ([[BASE:%.+]]: tensor<?xf32>, [[IDXVEC:%.+]]: vector<2xindex>, [[MASK:%.+]]: vector<2xi1>, [[PASS:%.+]]: vector<2xf32>)
 // CHECK-DAG:     [[M0:%.+]]    = vector.extract [[MASK]][0] : i1 from vector<2xi1>