[Mlir-commits] [mlir] [mlir][vector] Set InBound for vector read write after peeling (PR #89108)

Wed Apr 17 10:36:43 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-mlir-vector

Author: None (ShivaChen)

<details>
<summary>Changes</summary>

Setting InBound attribute for vector.transfer_read/write if the loop iteration is multiple of vector size.

In this case, the number of effective vector elements in the last iteration is equal to vector size. So the mask can be avoid to generate by setting the attribute.

The loop could be produced by -scf-for-loop-peeling.

---
Full diff: https://github.com/llvm/llvm-project/pull/89108.diff


3 Files Affected:

- (modified) mlir/lib/Dialect/Vector/IR/CMakeLists.txt (+1) 
- (modified) mlir/lib/Dialect/Vector/IR/VectorOps.cpp (+54-1) 
- (modified) mlir/test/Dialect/Vector/vector-transforms.mlir (+23) 


``````````diff

diff --git a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
index 204462ffd047c6..95d31b9d8639cc 100644
--- a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt
@@ -24,6 +24,7 @@ add_mlir_dialect_library(MLIRVectorDialect
   MLIRMaskingOpInterface
   MLIRMemRefDialect
   MLIRSideEffectInterfaces
+  MLIRSCFDialect
   MLIRTensorDialect
   MLIRValueBoundsOpInterface
   MLIRVectorInterfaces
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 3e6425879cc67f..b233ad9786ce14 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -13,11 +13,13 @@
 
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
@@ -3975,6 +3977,57 @@ Type TransferReadOp::getExpectedMaskType() {
   return inferTransferOpMaskType(getVectorType(), getPermutationMap());
 }
 
+// Determine the loop upper bound UB == s1 - ((s1 - LB) mod Step).
+// Then loop iteration = UB - LB = s1 - LB - ((s1 - LB) mod Step)
+// which is multiple of step.
+// The loop could be produced by -scf-for-loop-peeling.
+static bool isLoopIterationAsMultipleOfStep(mlir::scf::ForOp forOp) {
+  OpBuilder b(forOp.getContext());
+
+  auto UBOp = forOp.getUpperBound().getDefiningOp();
+  if (!UBOp)
+    return false;
+  auto applyOp = dyn_cast<mlir::affine::AffineApplyOp>(UBOp);
+  if (!applyOp)
+    return false;
+
+  SmallVector<Value> mapOps(applyOp.getMapOperands().begin(),
+                            applyOp.getMapOperands().end());
+  if (mapOps.size() != 3)
+    return false;
+  if (mapOps[0] != forOp.getLowerBound())
+    return false;
+  if (mapOps[2] != forOp.getStep())
+    return false;
+
+  auto UBExpr = applyOp.getAffineMap().getResult(0);
+  auto LBExpr = b.getAffineSymbolExpr(0);
+  auto sym1 = b.getAffineSymbolExpr(1);
+  auto stepExpr = b.getAffineSymbolExpr(2);
+
+  return UBExpr == (sym1 - (sym1 - LBExpr) % stepExpr);
+}
+
+template <typename TransferOp>
+static bool isLoopIterationAsMultipleOfVectorSize(TransferOp op,
+            int64_t resultIdx, int64_t indicesIdx) {
+  Value index = op.getIndices()[indicesIdx];
+  auto forOp = dyn_cast<mlir::scf::ForOp>(op->getParentOp());
+  if (!forOp)
+    return false;
+  auto constantStep = forOp.getConstantStep();
+  if (!constantStep)
+    return false;
+  if (index != forOp.getInductionVar())
+    return false;
+  if (!isLoopIterationAsMultipleOfStep(forOp))
+    return false;
+
+  int64_t vectorSize = op.getVectorType().getDimSize(resultIdx);
+
+  return vectorSize == *constantStep;
+}
+
 template <typename TransferOp>
 static bool isInBounds(TransferOp op, int64_t resultIdx, int64_t indicesIdx) {
   // TODO: support more aggressive createOrFold on:
@@ -3984,7 +4037,7 @@ static bool isInBounds(TransferOp op, int64_t resultIdx, int64_t indicesIdx) {
   Value index = op.getIndices()[indicesIdx];
   std::optional<int64_t> cstOp = getConstantIntValue(index);
   if (!cstOp.has_value())
-    return false;
+    return isLoopIterationAsMultipleOfVectorSize(op, resultIdx, indicesIdx);
 
   int64_t sourceSize = op.getShapedType().getDimSize(indicesIdx);
   int64_t vectorSize = op.getVectorType().getDimSize(resultIdx);
diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir
index eda6a5cc40d999..7a31b010b88337 100644
--- a/mlir/test/Dialect/Vector/vector-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-transforms.mlir
@@ -442,3 +442,26 @@ func.func @vec_0D(%arg0: vector<f32>) -> vector<i32> {
   %0 = vector.bitcast %arg0 : vector<f32> to vector<i32>
   return %0 : vector<i32>
 }
+
+// CHECK-LABEL: func @set_inbound
+//       CHECK:   scf.for
+//       CHECK:   vector.transfer_read  %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} {in_bounds = [true]}
+//       CHECK:   vector.transfer_write %{{.*}}[%{{.*}}, %{{.*}}] {in_bounds = [true]}
+//       CHECK:   }
+//       CHECK:   return
+#map = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
+func.func @set_inbound(%arg0: index, %a : memref<100x100xi32>, %b : memref<100x100xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c64 = arith.constant 64 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  scf.for %arg1 = %c0 to %arg0 step %c1 {
+    %2 = affine.apply #map()[%c0, %arg0, %c64]
+    scf.for %arg2 = %c0 to %2 step %c64 {
+      %3 = vector.transfer_read %a[%arg1, %arg2], %c0_i32 : memref<100x100xi32>, vector<64xi32>
+      %4 = arith.addi %3, %3 : vector<64xi32>
+      vector.transfer_write %4, %b[%arg1, %arg2] : vector<64xi32>, memref<100x100xi32>
+    }
+  }
+  return
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/89108