[Mlir-commits] [mlir] [mlir][ArmSME] Fold transpose into xfer read to enable in-flight transpose (PR #92562)
Cullen Rhodes
llvmlistbot at llvm.org
Fri May 17 08:07:12 PDT 2024
https://github.com/c-rhodes created https://github.com/llvm/llvm-project/pull/92562
vector.transpose ops whose inputs come from vector.transfer_read can be eliminated by folding the transpose into the xfer op to enable in-flight transposition when converting xfer read to arm_sme.tile_load.
>From 650e1e8637d3297e4d3929eac074b3c44251c915 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Fri, 17 May 2024 09:52:35 +0000
Subject: [PATCH] [mlir][ArmSME] Fold transpose into xfer read to enable
in-flight transpose
vector.transpose ops whose inputs come from vector.transfer_read can be
eliminated by folding the transpose into the xfer op to enable in-flight
transposition when converting xfer read to arm_sme.tile_load.
---
.../Conversion/VectorToArmSME/VectorToArmSME.cpp | 16 ++++++++++++++--
.../VectorToArmSME/vector-to-arm-sme.mlir | 12 ++++++++++++
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp
index d8e473a562e53..b1b84705da7d3 100644
--- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp
+++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp
@@ -356,6 +356,20 @@ struct TransposeOpToArmSMELowering
return failure();
auto loc = transposeOp.getLoc();
+ Value input = transposeOp.getVector();
+
+ if (auto xferOp = input.getDefiningOp<vector::TransferReadOp>()) {
+ // Fold transpose into transfer_read to enable in-flight transpose when
+ // converting to arm_sme.tile_load.
+ rewriter.modifyOpInPlace(xferOp, [&]() {
+ SmallVector<bool> inBounds(xferOp.getVectorType().getRank(), false);
+ xferOp->setAttr(xferOp.getPermutationMapAttrName(),
+ AffineMapAttr::get(AffineMap::getPermutationMap(
+ permutation, transposeOp.getContext())));
+ });
+ rewriter.replaceOp(transposeOp, xferOp);
+ return success();
+ }
// Allocate buffer to store input tile to.
Value vscale =
@@ -372,8 +386,6 @@ struct TransposeOpToArmSMELowering
auto buffer = rewriter.create<memref::AllocaOp>(
loc, bufferType, ValueRange{numTileSlices, numTileSlices});
- Value input = transposeOp.getVector();
-
// Store input tile.
auto tileStoreOp = rewriter.create<arm_sme::TileStoreOp>(
loc, input, buffer, ValueRange{c0, c0});
diff --git a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
index ce0b46e0f061a..48e92ce88ed16 100644
--- a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
+++ b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
@@ -150,6 +150,18 @@ func.func @transfer_read_2d_transpose_with_mask_f32(%src : memref<?x?xf32>, %mas
// -----
+// CHECK-LABEL: @fold_transpose_into_load
+// CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
+func.func @fold_transpose_into_load(%src : memref<?x?xf32>) {
+ %c0 = arith.constant 0 : index
+ %pad = arith.constant 0.0 : f32
+ %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf32>, vector<[4]x[4]xf32>
+ %1 = vector.transpose %0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32>
+ "prevent.dce"(%1) : (vector<[4]x[4]xf32>) -> ()
+}
+
+// -----
+
//===----------------------------------------------------------------------===//
// vector.transfer_write
//===----------------------------------------------------------------------===//
More information about the Mlir-commits
mailing list