[Mlir-commits] [mlir] 97b351a - [mlir][gpu] Fix leaked stream and module when lowering gpu.launch_func to runtime calls.

Thu Oct 29 00:41:02 PDT 2020

Author: Christian Sigg
Date: 2020-10-29T08:40:51+01:00
New Revision: 97b351a827677ebbedc10bfbce8ef8844c246553

URL: https://github.com/llvm/llvm-project/commit/97b351a827677ebbedc10bfbce8ef8844c246553
DIFF: https://github.com/llvm/llvm-project/commit/97b351a827677ebbedc10bfbce8ef8844c246553.diff

LOG: [mlir][gpu] Fix leaked stream and module when lowering gpu.launch_func to runtime calls.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D90370

Added: 
    

Modified: 
    mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
    mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
    mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
    mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index ae112d516d5a..a8a416d7843d 100644

--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -88,6 +88,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
       "mgpuModuleLoad",
       llvmPointerType /* void *module */,
       {llvmPointerType /* void *cubin */}};
+  FunctionCallBuilder moduleUnloadCallBuilder = {
+      "mgpuModuleUnload", llvmVoidType, {llvmPointerType /* void *module */}};
   FunctionCallBuilder moduleGetFunctionCallBuilder = {
       "mgpuModuleGetFunction",
       llvmPointerType /* void *function */,
@@ -490,6 +492,8 @@ LogicalResult ConvertLaunchFuncOpToGpuRuntimeCallPattern::matchAndRewrite(
        kernelParams,                /* kernel params */
        nullpointer /* extra */});
   streamSynchronizeCallBuilder.create(loc, rewriter, stream.getResult(0));
+  streamDestroyCallBuilder.create(loc, rewriter, stream.getResult(0));
+  moduleUnloadCallBuilder.create(loc, rewriter, module.getResult(0));
 
   rewriter.eraseOp(op);
   return success();

diff  --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
index 908604e091f5..c63936848e9a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir
@@ -48,4 +48,6 @@ module attributes {gpu.container_module} {
   // CHECK-SAME: [[C8]], [[C8]], [[C8]], [[C0_I32]], [[STREAM]],
   // CHECK-SAME: [[PARAMS]], [[EXTRA_PARAMS]])
   // CHECK: llvm.call @mgpuStreamSynchronize
+  // CHECK: llvm.call @mgpuStreamDestroy
+  // CHECK: llvm.call @mgpuModuleUnload
 }

diff  --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
index a32c37d96a42..55567001aeae 100644
--- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
@@ -47,6 +47,10 @@ extern "C" CUmodule mgpuModuleLoad(void *data) {
   return module;
 }
 
+extern "C" void mgpuModuleUnload(CUmodule module) {
+  CUDA_REPORT_IF_ERROR(cuModuleUnload(module));
+}
+
 extern "C" CUfunction mgpuModuleGetFunction(CUmodule module, const char *name) {
   CUfunction function = nullptr;
   CUDA_REPORT_IF_ERROR(cuModuleGetFunction(&function, module, name));

diff  --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
index 999b80c21fbc..d600a3fd8fa2 100644
--- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
@@ -46,6 +46,10 @@ extern "C" hipModule_t mgpuModuleLoad(void *data) {
   return module;
 }
 
+extern "C" void mgpuModuleUnload(hipModule_t module) {
+  HIP_REPORT_IF_ERROR(hipModuleUnload(module));
+}
+
 extern "C" hipFunction_t mgpuModuleGetFunction(hipModule_t module,
                                                const char *name) {
   hipFunction_t function = nullptr;