[Openmp-commits] [openmp] 097f426 - [OpenMP][libomptarget] Fix deinit of NextGen AMDGPU plugin

Kevin Sala via Openmp-commits openmp-commits at lists.llvm.org
Fri Jan 20 04:18:26 PST 2023


Author: Kevin Sala
Date: 2023-01-20T13:17:32+01:00
New Revision: 097f42602d83926a9d5e7fbe3d0bb8ef5c733183

URL: https://github.com/llvm/llvm-project/commit/097f42602d83926a9d5e7fbe3d0bb8ef5c733183
DIFF: https://github.com/llvm/llvm-project/commit/097f42602d83926a9d5e7fbe3d0bb8ef5c733183.diff

LOG: [OpenMP][libomptarget] Fix deinit of NextGen AMDGPU plugin

This patch fixes a segfault that was appearing when the plugin fails to
initialize and then is deinitialized. Also, do not call hsa_shut_down if
the hsa_init failed.

Differential Revision: https://reviews.llvm.org/D142145

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 14efe841ee99b..7dfef02d89296 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2245,7 +2245,9 @@ struct AMDGPUGlobalHandlerTy final : public GenericGlobalHandlerTy {
 /// Class implementing the AMDGPU-specific functionalities of the plugin.
 struct AMDGPUPluginTy final : public GenericPluginTy {
   /// Create an AMDGPU plugin and initialize the AMDGPU driver.
-  AMDGPUPluginTy() : GenericPluginTy(getTripleArch()), HostDevice(nullptr) {}
+  AMDGPUPluginTy()
+      : GenericPluginTy(getTripleArch()), Initialized(false),
+        HostDevice(nullptr) {}
 
   /// This class should not be copied.
   AMDGPUPluginTy(const AMDGPUPluginTy &) = delete;
@@ -2256,10 +2258,14 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     hsa_status_t Status = hsa_init();
     if (Status != HSA_STATUS_SUCCESS) {
       // Cannot call hsa_success_string.
-      DP("Failed initialize AMDGPU's HSA library\n");
+      DP("Failed to initialize AMDGPU's HSA library\n");
       return 0;
     }
 
+    // The initialization of HSA was successful. It should be safe to call
+    // HSA functions from now on, e.g., hsa_shut_down.
+    Initialized = true;
+
     // Register event handler to detect memory errors on the devices.
     Status = hsa_amd_register_system_event_handler(eventHandler, nullptr);
     if (auto Err = Plugin::check(
@@ -2319,8 +2325,14 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
 
   /// Deinitialize the plugin.
   Error deinitImpl() override {
-    if (auto Err = HostDevice->deinit())
-      return Err;
+    // The HSA runtime was not initialized, so nothing from the plugin was
+    // actually initialized.
+    if (!Initialized)
+      return Plugin::success();
+
+    if (HostDevice)
+      if (auto Err = HostDevice->deinit())
+        return Err;
 
     // Finalize the HSA runtime.
     hsa_status_t Status = hsa_shut_down();
@@ -2427,6 +2439,11 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     return HSA_STATUS_ERROR;
   }
 
+  /// Indicate whether the HSA runtime was correctly initialized. Even if there
+  /// is no available devices this boolean will be true. It indicates whether
+  /// we can safely call HSA functions (e.g., hsa_shut_down).
+  bool Initialized;
+
   /// Arrays of the available GPU and CPU agents. These arrays of handles should
   /// not be here but in the AMDGPUDeviceTy structures directly. However, the
   /// HSA standard does not provide API functions to retirve agents directly,


        


More information about the Openmp-commits mailing list