[Mlir-commits] [mlir] [mlir][vector] Implement lowering for 1D vector.deinterleave operations (PR #93042)

Thu May 23 06:42:50 PDT 2024

https://github.com/mub-at-arm updated https://github.com/llvm/llvm-project/pull/93042

>From 7dd81f4598702027bac5e4fd31f62c07a74f115a Mon Sep 17 00:00:00 2001
From: "Mubashar.Ahmad at arm.com" <mubashar.ahmad at arm.com>
Date: Thu, 16 May 2024 12:28:34 +0000
Subject: [PATCH 1/2] [mlir][VectorOps] Add deinterleave operation to vector
 dialect

The deinterleave operation constructs two vectors from a single input
vector. Each new vector is the collection of even and odd elements
from the input, respectively. This is essentially the inverse of an
interleave operation.

Each output's size is half of the input vector's trailing dimension
for the n-D case and only dimension for 1-D cases. It is not possible
to conduct the operation on 0-D inputs or vectors where the size of
the (trailing) dimension is 1.

The operation supports scalable vectors.

Example:
```mlir
%0 = vector.deinterleave %a
           : vector<[4]xi32> -> vector<[2]xi32>
%1 = vector.deinterleave %b
           : vector<8xi8> -> vector<4xi8>
%2 = vector.deinterleave %c
           : vector<2x8xf32> -> vector<2x4xf32>
%3 = vector.deinterleave %d
           : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64>
```

>From 91b971b436bd3c242442a1a3ffea1e5df9cac468 Mon Sep 17 00:00:00 2001
From: "Mubashar.Ahmad at arm.com" <mubashar.ahmad at arm.com>
Date: Wed, 22 May 2024 09:01:18 +0000
Subject: [PATCH 2/2] [mlir][vector] Implement lowering for 1D
 vector.deinterleave operations

This patchs implements the lowering of vector.deinterleave
for 1D vectors.

For fixed vector types, the operation is lowered to two
llvm shufflevector operations. One for even indexed
elements and the other for odd indexed elements. A poison
operation is used to satisfy the parameters of the
shufflevector parameters.

For scalable vectors, the llvm vector.deinterleave2
intrinsic is used for lowering. As such the results
found by extraction and used to form the result
struct for the intrinsic.
---
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      | 64 ++++++++++++++++++-
 .../VectorToLLVM/vector-to-llvm.mlir          | 22 +++++++
 .../Vector/CPU/ArmSVE/test-deinterleave.mlir  | 18 ++++++
 .../Dialect/Vector/CPU/test-deinterleave.mlir | 18 ++++++
 4 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-deinterleave.mlir
 create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/test-deinterleave.mlir

diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index fe6bcc1c8b667..293d83369674b 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -1761,6 +1761,66 @@ struct VectorInterleaveOpLowering
   }
 };
 
+/// Conversion pattern for a `vector.deinterleave`.
+/// Support available for fixed-sized vectors and scalable vectors.
+
+struct VectorDeinterleaveOpLowering
+    : public ConvertOpToLLVMPattern<vector::DeinterleaveOp> {
+  using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(vector::DeinterleaveOp deinterleaveOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    VectorType resultType = deinterleaveOp.getResultVectorType();
+    VectorType sourceType = deinterleaveOp.getSourceVectorType();
+    auto loc = deinterleaveOp.getLoc();
+
+    if (resultType.getRank() != 1)
+      return rewriter.notifyMatchFailure(deinterleaveOp,
+                                         "deinterleaveOp not rank 1");
+
+    if (resultType.isScalable()) {
+      auto llvmTypeConverter = this->getTypeConverter();
+      auto deinterleaveResults = deinterleaveOp.getResultTypes();
+      auto packedOpResults =
+          llvmTypeConverter->packOperationResults(deinterleaveResults);
+      auto intrinsic = rewriter.create<LLVM::vector_deinterleave2>(
+          loc, packedOpResults, adaptor.getSource());
+
+      auto resultOne = rewriter.create<LLVM::ExtractValueOp>(
+          loc, intrinsic->getResult(0), 0);
+      auto resultTwo = rewriter.create<LLVM::ExtractValueOp>(
+          loc, intrinsic->getResult(0), 1);
+
+      rewriter.replaceOp(deinterleaveOp, ValueRange{resultOne, resultTwo});
+      return success();
+    }
+
+    int64_t resultVectorSize = resultType.getNumElements();
+    auto poison = rewriter.create<LLVM::PoisonOp>(loc, sourceType);
+    SmallVector<int32_t> shuffleMaskOne;
+    SmallVector<int32_t> shuffleMaskTwo;
+
+    shuffleMaskOne.reserve(resultVectorSize);
+    shuffleMaskTwo.reserve(resultVectorSize);
+
+    for (int i = 0; i < sourceType.getNumElements(); ++i) {
+      if (i % 2 == 0)
+        shuffleMaskOne.push_back(i);
+      else
+        shuffleMaskTwo.push_back(i);
+    }
+
+    auto evenShuffle = rewriter.create<LLVM::ShuffleVectorOp>(
+        loc, adaptor.getSource(), poison, shuffleMaskOne);
+    auto oddShuffle = rewriter.create<LLVM::ShuffleVectorOp>(
+        loc, adaptor.getSource(), poison, shuffleMaskTwo);
+
+    rewriter.replaceOp(deinterleaveOp, ValueRange{evenShuffle, oddShuffle});
+    return ::success();
+  }
+};
+
 } // namespace
 
 /// Populate the given list with patterns that convert from Vector to LLVM.
@@ -1785,8 +1845,8 @@ void mlir::populateVectorToLLVMConversionPatterns(
                VectorExpandLoadOpConversion, VectorCompressStoreOpConversion,
                VectorSplatOpLowering, VectorSplatNdOpLowering,
                VectorScalableInsertOpLowering, VectorScalableExtractOpLowering,
-               MaskedReductionOpConversion, VectorInterleaveOpLowering>(
-      converter);
+               MaskedReductionOpConversion, VectorInterleaveOpLowering,
+               VectorDeinterleaveOpLowering>(converter);
   // Transfer ops with rank > 1 are handled by VectorToSCF.
   populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
 }
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 439f1e920e392..d1755f0cd3a21 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -2546,3 +2546,25 @@ func.func @vector_interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]
   %0 = vector.interleave %a, %b : vector<2x[8]xi16>
   return %0 : vector<2x[16]xi16>
 }
+
+// -----
+
+// CHECK-LABEL: @vector_deinterleave_1d
+// CHECK-SAME:  (%{{.*}}: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>)
+func.func @vector_deinterleave_1d(%a: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) {
+  // CHECK: llvm.mlir.poison : vector<4xi32>
+  // CHECK: llvm.shufflevector %{{.*}}, %{{.*}} [0, 2] : vector<4xi32> 
+  // CHECK: llvm.shufflevector %{{.*}}, %{{.*}} [1, 3] : vector<4xi32> 
+  %0, %1 = vector.deinterleave %a : vector<4xi32> -> vector<2xi32>
+  return %0, %1 : vector<2xi32>, vector<2xi32>
+}
+
+// CHECK-LABEL: @vector_deinterleave_1d_scalable
+// CHECK-SAME:  %{{.*}}: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>)
+func.func @vector_deinterleave_1d_scalable(%a: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) {
+    // CHECK: llvm.intr.vector.deinterleave2
+    // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> 
+    // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> 
+    %0, %1 = vector.deinterleave %a : vector<[4]xi32> -> vector<[2]xi32>
+    return %0, %1 : vector<[2]xi32>, vector<[2]xi32>
+}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-deinterleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-deinterleave.mlir
new file mode 100644
index 0000000000000..d8cd38ef33037
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-deinterleave.mlir
@@ -0,0 +1,18 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s
+
+func.func @entry() {
+  %step_vector = llvm.intr.experimental.stepvector : vector<[4]xi8>
+  vector.print %step_vector : vector<[4]xi8>
+  // CHECK: ( 0, 1, 2, 3, 4, 5, 6, 7 )
+
+  %v1, %v2 = vector.deinterleave %step_vector : vector<[4]xi8> -> vector<[2]xi8>
+  vector.print %v1 : vector<[2]xi8>
+  vector.print %v2 : vector<[2]xi8>
+  // CHECK: ( 0, 2, 4, 6 )
+  // CHECK: ( 1, 3, 5, 7 )
+
+  return
+}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-deinterleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-deinterleave.mlir
new file mode 100644
index 0000000000000..4915a3cde124d
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-deinterleave.mlir
@@ -0,0 +1,18 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s
+
+func.func @entry() {
+  %v0 = arith.constant dense<[1, 2, 3, 4]> : vector<4xi8>
+  vector.print %v0 : vector<4xi8>
+  // CHECK: ( 1, 2, 3, 4 )
+
+  %v1, %v2 = vector.deinterleave %v0 : vector<4xi8> -> vector<2xi8>
+  vector.print %v1 : vector<2xi8>
+  vector.print %v2 : vector<2xi8>
+  // CHECK: ( 1, 3 )
+  // CHECK: ( 2, 4 )
+
+  return
+}