[Mlir-commits] [mlir] 46c32af - [mlir] Enable folding memref alias for `ldmatrix`
Guray Ozen
llvmlistbot at llvm.org
Thu May 25 04:10:23 PDT 2023
Author: Guray Ozen
Date: 2023-05-25T13:10:17+02:00
New Revision: 46c32afbc5e126285f18fb93803ce79bd76230a9
URL: https://github.com/llvm/llvm-project/commit/46c32afbc5e126285f18fb93803ce79bd76230a9
DIFF: https://github.com/llvm/llvm-project/commit/46c32afbc5e126285f18fb93803ce79bd76230a9.diff
LOG: [mlir] Enable folding memref alias for `ldmatrix`
Folding mechanism does not recognize `ldmatrix` op. This work helps pass to recognize the op and fold the memref aliases.
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D151412
Added:
Modified:
mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
index 2c30e98dd1070..5916d6489cbc8 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
@@ -169,6 +169,10 @@ static Value getMemRefOperand(vector::TransferReadOp op) {
return op.getSource();
}
+static Value getMemRefOperand(nvgpu::LdMatrixOp op) {
+ return op.getSrcMemref();
+}
+
static Value getMemRefOperand(vector::TransferWriteOp op) {
return op.getSource();
}
@@ -406,6 +410,11 @@ LogicalResult LoadOpOfSubViewOpFolder<OpTy>::matchAndRewrite(
op, op.getType(), subViewOp.getSource(), sourceIndices,
op.getLeadDimension(), op.getTransposeAttr());
})
+ .Case([&](nvgpu::LdMatrixOp op) {
+ rewriter.replaceOpWithNewOp<nvgpu::LdMatrixOp>(
+ op, op.getType(), subViewOp.getSource(), sourceIndices,
+ op.getTranspose(), op.getNumTiles());
+ })
.Default([](Operation *) { llvm_unreachable("unexpected operation."); });
return success();
}
@@ -658,6 +667,7 @@ LogicalResult NvgpuAsyncCopyOpSubViewOpFolder::matchAndRewrite(
void memref::populateFoldMemRefAliasOpPatterns(RewritePatternSet &patterns) {
patterns.add<LoadOpOfSubViewOpFolder<affine::AffineLoadOp>,
LoadOpOfSubViewOpFolder<memref::LoadOp>,
+ LoadOpOfSubViewOpFolder<nvgpu::LdMatrixOp>,
LoadOpOfSubViewOpFolder<vector::TransferReadOp>,
LoadOpOfSubViewOpFolder<gpu::SubgroupMmaLoadMatrixOp>,
StoreOpOfSubViewOpFolder<affine::AffineStoreOp>,
diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
index 93e8a20e20ce7..0e9df2969023e 100644
--- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
+++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
@@ -599,3 +599,25 @@ func.func @fold_src_fold_dest_nvgpu_device_async_copy(%gmem_memref_3d : memref<2
// CHECK-DAG: %[[RESOLVED_DST_IDX_1:.+]] = affine.apply #[[MAP]]()[%[[DEST_IDX_1]], %[[DEST_SUB_IDX_0]]]
// CHECK-DAG: %[[RESOLVED_DST_IDX_3:.+]] = affine.apply #[[MAP]]()[%[[DEST_IDX_3]], %[[DEST_SUB_IDX_1]]]
// CHECK-DAG: nvgpu.device_async_copy %[[GMEM_MEMREF_3d]][%[[RESOLVED_SRC_IDX_0]], %[[SRC_IDX_1]], %[[RESOLVED_SRC_IDX_1]]], %[[SMEM_MEMREF_4d]][%[[DEST_IDX_0]], %[[RESOLVED_DST_IDX_1]], %[[DEST_IDX_2]], %[[RESOLVED_DST_IDX_3]]], 8 {bypassL1} : memref<2x128x768xf16> to memref<5x1x64x64xf16, #gpu.address_space<workgroup>>
+
+// -----
+
+#map = affine_map<()[s0] -> (-s0 + 4)>
+#map1 = affine_map<()[s0] -> (-s0 + 32)>
+
+func.func @test_ldmatrix(%arg0: memref<4x32x32xf16, 3>, %arg1: index, %arg2: index, %arg3: index) -> vector<4x2xf16> {
+ %c0 = arith.constant 0 : index
+ %0 = affine.apply #map()[%arg1]
+ %1 = affine.apply #map1()[%arg2]
+ %2 = affine.apply #map1()[%arg3]
+ %subview = memref.subview %arg0[%arg1, %arg2, %arg3] [%0, %1, %2] [1, 1, 1] : memref<4x32x32xf16, 3> to memref<?x?x?xf16, strided<[1024, 32, 1], offset: ?>, 3>
+ %3 = nvgpu.ldmatrix %subview[%c0, %c0, %c0] {numTiles = 4 : i32, transpose = false} : memref<?x?x?xf16, strided<[1024, 32, 1], offset: ?>, 3> -> vector<4x2xf16>
+ return %3 : vector<4x2xf16>
+}
+
+// CHECK: func @test_ldmatrix
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<4x32x32xf16, 3>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index
+// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index
+// CHECK: nvgpu.ldmatrix %[[ARG0]][%[[ARG1]], %[[ARG2]], %[[ARG3]]] {numTiles = 4 : i32, transpose = false} : memref<4x32x32xf16, 3> -> vector<4x2xf16>
More information about the Mlir-commits
mailing list