[Mlir-commits] [mlir] 9f8f1d9 - [MLIR][AMDGPU] Add ability to do 16-bit Memset with HIP APIs (#108587)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Sep 20 07:53:44 PDT 2024
Author: Umang Yadav
Date: 2024-09-20T09:53:41-05:00
New Revision: 9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd
URL: https://github.com/llvm/llvm-project/commit/9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd
DIFF: https://github.com/llvm/llvm-project/commit/9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd.diff
LOG: [MLIR][AMDGPU] Add ability to do 16-bit Memset with HIP APIs (#108587)
CC: @krzysz00 @manupak
Added:
Modified:
mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
Removed:
################################################################################
diff --git a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
index 11cf6d7b077c0f..b984149ca6dea5 100644
--- a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
@@ -126,6 +126,13 @@ extern "C" void mgpuMemset32(void *dst, int value, size_t count,
HIP_REPORT_IF_ERROR(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(dst),
value, count, stream));
}
+
+extern "C" void mgpuMemset16(void *dst, int short value, size_t count,
+ hipStream_t stream) {
+ HIP_REPORT_IF_ERROR(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(dst),
+ value, count, stream));
+}
+
/// Helper functions for writing mlir example code
// Allows to register byte array with the ROCM runtime. Helpful until we have
diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index aaced31813d574..3e5e499d82594a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -16,3 +16,20 @@ module attributes {gpu.container_module} {
return
}
}
+
+module attributes {gpu.container_module} {
+
+ // CHECK: func @foo16
+ func.func @foo16(%dst : memref<7xf16, 1>, %value : f16) {
+ // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
+ %t0 = gpu.wait async
+ // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
+ // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
+ // CHECK: llvm.call @mgpuMemset16(%[[addr_cast]], %{{.*}}, %[[size_bytes]], %[[t0]])
+ %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf16, 1>, f16
+ // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
+ // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
+ gpu.wait [%t1]
+ return
+ }
+}
More information about the Mlir-commits
mailing list