[Mlir-commits] [mlir] 9f8f1d9 - [MLIR][AMDGPU] Add ability to do 16-bit Memset with HIP APIs (#108587)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Sep 20 07:53:44 PDT 2024


Author: Umang Yadav
Date: 2024-09-20T09:53:41-05:00
New Revision: 9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd

URL: https://github.com/llvm/llvm-project/commit/9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd
DIFF: https://github.com/llvm/llvm-project/commit/9f8f1d9890fcbae96b92fa0bee5a6f9e1f953ebd.diff

LOG: [MLIR][AMDGPU] Add ability to do 16-bit Memset with HIP APIs (#108587)

CC: @krzysz00  @manupak

Added: 
    

Modified: 
    mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
    mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
index 11cf6d7b077c0f..b984149ca6dea5 100644
--- a/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
@@ -126,6 +126,13 @@ extern "C" void mgpuMemset32(void *dst, int value, size_t count,
   HIP_REPORT_IF_ERROR(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(dst),
                                         value, count, stream));
 }
+
+extern "C" void mgpuMemset16(void *dst, int short value, size_t count,
+                             hipStream_t stream) {
+  HIP_REPORT_IF_ERROR(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(dst),
+                                        value, count, stream));
+}
+
 /// Helper functions for writing mlir example code
 
 // Allows to register byte array with the ROCM runtime. Helpful until we have

diff  --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index aaced31813d574..3e5e499d82594a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -16,3 +16,20 @@ module attributes {gpu.container_module} {
     return
   }
 }
+
+module attributes {gpu.container_module} {
+
+  // CHECK: func @foo16
+  func.func @foo16(%dst : memref<7xf16, 1>, %value : f16) {
+    // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
+    %t0 = gpu.wait async
+    // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
+    // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
+    // CHECK: llvm.call @mgpuMemset16(%[[addr_cast]], %{{.*}}, %[[size_bytes]], %[[t0]])
+    %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf16, 1>, f16
+    // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
+    // CHECK: llvm.call @mgpuStreamDestroy(%[[t0]])
+    gpu.wait [%t1]
+    return
+  }
+}


        


More information about the Mlir-commits mailing list