[Mlir-commits] [mlir] [mlir][scf] Rewrite vector.transfer_read/write after peeling (PR #88684)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Apr 14 23:04:08 PDT 2024
https://github.com/ShivaChen created https://github.com/llvm/llvm-project/pull/88684
After peeling, the loop iteration will be multiple of step. If the vector size of vector.transfer_read/write is equal to step in the peeled loop, there won't be the remaining iteration smaller than the vector size.
In this case, rewriting vector.transfer_read/write to vector.load/store could avoid generating masks when lowering to LLVM Dialect.
>From a37d9207ad9b286dba461f1aa53209d3cd31545e Mon Sep 17 00:00:00 2001
From: Shiva Chen <shiva.chen at imgtec.com>
Date: Wed, 3 Apr 2024 08:02:03 +0100
Subject: [PATCH] [mlir][scf] Rewrite vector.transfer_read/write after peeling
After peeling, the loop iteration will be multiple of step.
If the vector size of vector.transfer_read/write is equal to step in the
peeled loop, there won't be the remaining iteration smaller than the
vector size.
In this case, rewriting vector.transfer_read/write to vector.load/store
could avoid generating masks when lowering to LLVM Dialect.
---
.../SCF/Transforms/LoopSpecialization.cpp | 49 +++++++++++++++++++
.../for-loop-peeling-vector-load-store.mlir | 30 ++++++++++++
2 files changed, 79 insertions(+)
create mode 100644 mlir/test/Dialect/SCF/for-loop-peeling-vector-load-store.mlir
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index a30e349d49136c..4eb757d618a98d 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -20,6 +20,7 @@
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/PatternMatch.h"
@@ -166,6 +167,52 @@ static LogicalResult peelForLoop(RewriterBase &b, ForOp forOp,
return success();
}
+static void rewriteVectorReadWriteToLoadStore(RewriterBase &b, Operation *op) {
+ b.setInsertionPoint(op);
+ if (auto write = dyn_cast<vector::TransferWriteOp>(op)) {
+ b.replaceOpWithNewOp<vector::StoreOp>(
+ op, write.getVector(), write.getSource(), write.getIndices());
+ } else if (auto read = dyn_cast<vector::TransferReadOp>(op)) {
+ b.replaceOpWithNewOp<vector::LoadOp>(op, read.getVectorType(),
+ read.getSource(), read.getIndices());
+ }
+}
+
+static bool hasVectorSizeEqualToStep(Operation *Op,
+ std::optional<int64_t> step) {
+ if (!step)
+ return false;
+
+ if (isa<vector::TransferWriteOp, vector::TransferReadOp>(Op)) {
+ auto vectorType = isa<vector::TransferWriteOp>(Op)
+ ? cast<vector::TransferWriteOp>(Op).getVectorType()
+ : cast<vector::TransferReadOp>(Op).getVectorType();
+
+ if (vectorType.getRank() != 1)
+ return false;
+
+ auto vectorSize = vectorType.getShape()[0];
+ if (vectorSize == *step)
+ return true;
+ }
+
+ return false;
+}
+
+static void rewriteVectorizedLoopAfterPeeling(RewriterBase &rewriter,
+ ForOp forOp) {
+ auto stepInt = getConstantIntValue(forOp.getStep());
+
+ forOp.walk([&](Operation *affineOp) {
+ if (!isa<vector::TransferWriteOp, vector::TransferReadOp>(affineOp))
+ return WalkResult::advance();
+ if (!hasVectorSizeEqualToStep(affineOp, stepInt))
+ return WalkResult::advance();
+ rewriteVectorReadWriteToLoadStore(rewriter, affineOp);
+ return WalkResult::advance();
+ });
+}
+
static void rewriteAffineOpAfterPeeling(RewriterBase &rewriter, ForOp forOp,
ForOp partialIteration,
Value previousUb) {
@@ -200,6 +247,8 @@ LogicalResult mlir::scf::peelForLoopAndSimplifyBounds(RewriterBase &rewriter,
if (failed(peelForLoop(rewriter, forOp, partialIteration, splitBound)))
return failure();
+ rewriteVectorizedLoopAfterPeeling(rewriter, forOp);
+
// Rewrite affine.min and affine.max ops.
rewriteAffineOpAfterPeeling(rewriter, forOp, partialIteration, previousUb);
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling-vector-load-store.mlir b/mlir/test/Dialect/SCF/for-loop-peeling-vector-load-store.mlir
new file mode 100644
index 00000000000000..04991930a2c262
--- /dev/null
+++ b/mlir/test/Dialect/SCF/for-loop-peeling-vector-load-store.mlir
@@ -0,0 +1,30 @@
+// RUN: mlir-opt %s -scf-for-loop-peeling -canonicalize -verify-diagnostics | FileCheck %s
+
+func.func @vector_read_write(%a : memref<100xi32>, %b : memref<100xi32>, %ub: index) {
+// %LB to %NEW_UB will be multiple of STEP after peeling.
+// So vector.transfer_write could be transferred to vector.store to avoid
+// generating mask when lowering to LLVM.
+//
+// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 64) * 64)>
+// CHECK: func @vector_read_write(
+// CHECK-SAME: %[[A:.*]]: memref<100xi32>, %[[B:.*]]: memref<100xi32>, %[[UB:.*]]: index
+// CHECK: %[[LB:.*]] = arith.constant 0 : index
+// CHECK: %[[STEP:.*]] = arith.constant 64 : index
+// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]
+// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] step %[[STEP]] {
+// CHECK: %[[VAL:.*]] = vector.load %[[B]][%[[IV]]]
+// CHECK: vector.store %[[VAL]], %[[A]][%[[IV]]]
+// CHECK: }
+// CHECK: scf.for %[[IV:.*]] = %[[NEW_UB]] to %[[UB]] step %[[STEP]] {
+// CHECK: %[[VAL:.*]] = vector.transfer_read %[[B]][%[[IV]]]
+// CHECK: vector.transfer_write %[[VAL]], %[[A]][%[[IV]]]
+// CHECK: }
+ %c0 = arith.constant 0 : index
+ %c64 = arith.constant 64 : index
+ %pad = arith.constant 0 : i32
+ scf.for %i = %c0 to %ub step %c64 {
+ %val = vector.transfer_read %b[%i], %pad : memref<100xi32>, vector<64xi32>
+ vector.transfer_write %val, %a[%i] : vector<64xi32>, memref<100xi32>
+ }
+ return
+}
More information about the Mlir-commits
mailing list