[Mlir-commits] [mlir] ced9f4f - [MLIR] Modify lowering of gpu.alloc op to llvm (#69969)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Oct 25 12:00:51 PDT 2023
Author: Nishant Patel
Date: 2023-10-25T22:00:47+03:00
New Revision: ced9f4f0e808618bb75a3b133150ab3f9213e650
URL: https://github.com/llvm/llvm-project/commit/ced9f4f0e808618bb75a3b133150ab3f9213e650
DIFF: https://github.com/llvm/llvm-project/commit/ced9f4f0e808618bb75a3b133150ab3f9213e650.diff
LOG: [MLIR] Modify lowering of gpu.alloc op to llvm (#69969)
If gpu.alloc has no asyn deependency ( in case if gpu.alloc has
hostShared allocation), create a new stream & synchronize. This PR is
follow up to #66401
Added:
Modified:
mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 097caf23edfa5dd..12bd02050be036c 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -836,7 +836,11 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
Type elementPtrType = this->getElementPtrType(memRefType);
- auto stream = adaptor.getAsyncDependencies().front();
+
+ auto nullPtr = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPointerType);
+ Value stream = adaptor.getAsyncDependencies().empty()
+ ? nullPtr
+ : adaptor.getAsyncDependencies().front();
auto isHostShared = rewriter.create<mlir::LLVM::ConstantOp>(
loc, llvmInt8Type, rewriter.getI8IntegerAttr(isShared));
@@ -855,7 +859,12 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
auto memRefDescriptor = this->createMemRefDescriptor(
loc, memRefType, allocatedPtr, alignedPtr, shape, strides, rewriter);
- rewriter.replaceOp(allocOp, {memRefDescriptor, stream});
+ if (allocOp.getAsyncToken()) {
+ // Async alloc: make dependent ops use the same stream.
+ rewriter.replaceOp(allocOp, {memRefDescriptor, stream});
+ } else {
+ rewriter.replaceOp(allocOp, {memRefDescriptor});
+ }
return success();
}
diff --git a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
index f365dcb02daf4c2..70450656b9df64f 100644
--- a/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir
@@ -19,4 +19,22 @@ module attributes {gpu.container_module} {
gpu.wait [%3]
return
}
+
+ // CHECK-LABEL: llvm.func @alloc_sync
+ // CHECK-SAME: %[[size:.*]]: i64
+ func.func @alloc_sync(%size : index) {
+ // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
+ // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
+ // CHECK: %[[nullptr:.*]] = llvm.mlir.zero
+ // CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
+ // CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[nullptr]], %[[isHostShared]])
+ %0 = gpu.alloc host_shared (%size) : memref<?xf32>
+ // CHECK: %[[stream:.*]] = llvm.call @mgpuStreamCreate()
+ %1 = gpu.wait async
+ %2 = gpu.dealloc async [%1] %0 : memref<?xf32>
+ // CHECK: llvm.call @mgpuStreamSynchronize(%[[stream]])
+ // CHECK: llvm.call @mgpuStreamDestroy(%[[stream]])
+ gpu.wait [%2]
+ return
+ }
}
More information about the Mlir-commits
mailing list