[Openmp-commits] [openmp] d18fb09 - [libomptarget][amdgpu] Remove majority of fatal errors

Thu May 20 08:26:57 PDT 2021

Author: Jon Chesterfield
Date: 2021-05-20T16:26:43+01:00
New Revision: d18fb09c693970d1fad09e1ca4b595524af0c842

URL: https://github.com/llvm/llvm-project/commit/d18fb09c693970d1fad09e1ca4b595524af0c842
DIFF: https://github.com/llvm/llvm-project/commit/d18fb09c693970d1fad09e1ca4b595524af0c842.diff

LOG: [libomptarget][amdgpu] Remove majority of fatal errors

[libomptarget][amdgpu] Remove majority of fatal errors

Replaces most calls to exit() with returning an error to the library entry
point. Minor changes to error handling for clear bugs, remove some dead code.

Each exit() call site replaced is either in a library entry point or a
function that already returns error codes on some paths. The existing handling
is not well tested but replacing exit() with a fallback path should be a strict
improvement.

Remaining two early exit points are an abort() from a callback and exit() from
within msgpack. Fixes for those are less obvious and left for a later patch.

Reviewed By: pdhaliwal

Differential Revision: https://reviews.llvm.org/D102346

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/impl/data.cpp
    openmp/libomptarget/plugins/amdgpu/impl/internal.h
    openmp/libomptarget/plugins/amdgpu/impl/system.cpp
    openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index 345e7647bb29..fbc91c6e688a 100644

--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -20,7 +20,6 @@ using core::TaskImpl;
 extern ATLMachine g_atl_machine;
 
 namespace core {
-void allow_access_to_all_gpu_agents(void *ptr);
 
 namespace {
 ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) {
@@ -39,45 +38,33 @@ hsa_amd_memory_pool_t get_memory_pool_by_mem_place(atmi_mem_place_t place) {
 }
 } // namespace
 
-void register_allocation(void *ptr, size_t size, atmi_mem_place_t place) {
+hsa_status_t register_allocation(void *ptr, size_t size,
+                                 atmi_mem_place_t place) {
   if (place.dev_type == ATMI_DEVTYPE_CPU)
-    allow_access_to_all_gpu_agents(ptr);
+    return allow_access_to_all_gpu_agents(ptr);
+  else
+    return HSA_STATUS_SUCCESS;
 }
 
 atmi_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) {
-  atmi_status_t ret = ATMI_STATUS_SUCCESS;
   hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place);
   hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "atmi_malloc",
-           get_error_string(err));
-    exit(1);
-  }
   DEBUG_PRINT("Malloced [%s %d] %p\n",
               place.dev_type == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", place.dev_id,
               *ptr);
-  if (err != HSA_STATUS_SUCCESS)
-    ret = ATMI_STATUS_ERROR;
 
-  register_allocation(*ptr, size, place);
+  if (err == HSA_STATUS_SUCCESS) {
+    err = register_allocation(*ptr, size, place);
+  }
 
-  return ret;
+  return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR;
 }
 
 atmi_status_t Runtime::Memfree(void *ptr) {
-  atmi_status_t ret = ATMI_STATUS_SUCCESS;
-  hsa_status_t err;
-  err = hsa_amd_memory_pool_free(ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "atmi_free",
-           get_error_string(err));
-    exit(1);
-  }
+  hsa_status_t err = hsa_amd_memory_pool_free(ptr);
   DEBUG_PRINT("Freed %p\n", ptr);
 
-  if (err != HSA_STATUS_SUCCESS)
-    ret = ATMI_STATUS_ERROR;
-  return ret;
+  return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR;
 }
 
 } // namespace core

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
index 2907de7de6ad..ef0683987349 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -48,12 +48,6 @@ typedef struct atmi_implicit_args_s {
 extern "C" {
 #endif
 
-#define check(msg, status)                                                     \
-  if (status != HSA_STATUS_SUCCESS) {                                          \
-    printf("%s failed.\n", #msg);                                              \
-    exit(1);                                                                   \
-  }
-
 #ifdef DEBUG
 #define DEBUG_PRINT(fmt, ...)                                                  \
   if (core::Runtime::getInstance().getDebugMode()) {                           \
@@ -217,15 +211,14 @@ template <typename T> inline T *alignUp(T *value, size_t alignment) {
       alignDown((intptr_t)(value + alignment - 1), alignment));
 }
 
-extern void register_allocation(void *addr, size_t size,
-                                atmi_mem_place_t place);
+hsa_status_t register_allocation(void *addr, size_t size,
+                                 atmi_mem_place_t place);
 
 extern bool atl_is_atmi_initialized();
 
 bool handle_group_signal(hsa_signal_value_t value, void *arg);
 
-
-void allow_access_to_all_gpu_agents(void *ptr);
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
 } // namespace core
 
 const char *get_error_string(hsa_status_t err);

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index 02d04e913346..3998cbddecc2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -166,20 +166,14 @@ static void atl_reset_atmi_initialized() {
 
 bool atl_is_atmi_initialized() { return g_atmi_initialized; }
 
-void allow_access_to_all_gpu_agents(void *ptr) {
-  hsa_status_t err;
+hsa_status_t allow_access_to_all_gpu_agents(void *ptr) {
   std::vector<ATLGPUProcessor> &gpu_procs =
       g_atl_machine.processors<ATLGPUProcessor>();
   std::vector<hsa_agent_t> agents;
   for (uint32_t i = 0; i < gpu_procs.size(); i++) {
     agents.push_back(gpu_procs[i].agent());
   }
-  err = hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Allow agents ptr access", get_error_string(err));
-    exit(1);
-  }
+  return hsa_amd_agents_allow_access(agents.size(), &agents[0], NULL, ptr);
 }
 
 atmi_status_t Runtime::Initialize() {
@@ -188,10 +182,11 @@ atmi_status_t Runtime::Initialize() {
     return ATMI_STATUS_SUCCESS;
 
   if (devtype == ATMI_DEVTYPE_ALL || devtype & ATMI_DEVTYPE_GPU) {
-    if (atl_init_gpu_context() != ATMI_STATUS_SUCCESS) {
+    atmi_status_t rc = atl_init_gpu_context();
+    if (rc != ATMI_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "GPU context init",
              get_atmi_error_string(atl_init_gpu_context()));
-      exit(1);
+      return rc;
     }
   }
 
@@ -200,8 +195,7 @@ atmi_status_t Runtime::Initialize() {
 }
 
 atmi_status_t Runtime::Finalize() {
-  hsa_status_t err;
-
+  atmi_status_t rc = ATMI_STATUS_SUCCESS;
   for (uint32_t i = 0; i < SymbolInfoTable.size(); i++) {
     SymbolInfoTable[i].clear();
   }
@@ -212,14 +206,14 @@ atmi_status_t Runtime::Finalize() {
   KernelInfoTable.clear();
 
   atl_reset_atmi_initialized();
-  err = hsa_shut_down();
+  hsa_status_t err = hsa_shut_down();
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Shutting down HSA",
            get_error_string(err));
-    exit(1);
+    rc = ATMI_STATUS_ERROR;
   }
 
-  return ATMI_STATUS_SUCCESS;
+  return rc;
 }
 
 static void atmi_init_context_structs() {
@@ -243,7 +237,7 @@ static hsa_status_t get_memory_pool_info(hsa_amd_memory_pool_t memory_pool,
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Alloc allowed in memory pool check", get_error_string(err));
-    exit(1);
+    return err;
   }
   if (alloc_allowed) {
     uint32_t global_flag = 0;
@@ -252,7 +246,7 @@ static hsa_status_t get_memory_pool_info(hsa_amd_memory_pool_t memory_pool,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Get memory pool info", get_error_string(err));
-      exit(1);
+      return err;
     }
     if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
       ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_FINE_GRAINED);
@@ -277,29 +271,27 @@ static hsa_status_t get_agent_info(hsa_agent_t agent, void *data) {
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Get device type info", get_error_string(err));
-    exit(1);
+    return err;
   }
   switch (device_type) {
   case HSA_DEVICE_TYPE_CPU: {
-    ;
     ATLCPUProcessor new_proc(agent);
     err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
                                              &new_proc);
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterate all memory pools", get_error_string(err));
-      exit(1);
+      return err;
     }
     g_atl_machine.addProcessor(new_proc);
   } break;
   case HSA_DEVICE_TYPE_GPU: {
-    ;
     hsa_profile_t profile;
     err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile);
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Query the agent profile", get_error_string(err));
-      exit(1);
+      return err;
     }
     atmi_devtype_t gpu_type;
     gpu_type =
@@ -310,7 +302,7 @@ static hsa_status_t get_agent_info(hsa_agent_t agent, void *data) {
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterate all memory pools", get_error_string(err));
-      exit(1);
+      return err;
     }
     g_atl_machine.addProcessor(new_proc);
   } break;
@@ -368,10 +360,8 @@ static hsa_status_t init_compute_and_memory() {
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Getting a gpu agent",
            get_error_string(err));
-    exit(1);
-  }
-  if (err != HSA_STATUS_SUCCESS)
     return err;
+  }
 
   /* Init all devices or individual device types? */
   std::vector<ATLCPUProcessor> &cpu_procs =
@@ -510,7 +500,7 @@ static hsa_status_t init_compute_and_memory() {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Finding a CPU kernarg memory region handle",
              get_error_string(err));
-      exit(1);
+      return err;
     }
   }
   hsa_region_t atl_gpu_kernarg_region;
@@ -524,7 +514,7 @@ static hsa_status_t init_compute_and_memory() {
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Finding a kernarg memory region", get_error_string(err));
-      exit(1);
+      return err;
     }
   }
   if (num_procs > 0)
@@ -540,7 +530,7 @@ hsa_status_t init_hsa() {
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Initializing the hsa runtime", get_error_string(err));
-      exit(1);
+      return err;
     }
     if (err != HSA_STATUS_SUCCESS)
       return err;
@@ -551,7 +541,7 @@ hsa_status_t init_hsa() {
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "After initializing compute and memory", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     int gpu_count = g_atl_machine.processorCount<ATLGPUProcessor>();
@@ -635,7 +625,7 @@ atmi_status_t atl_init_gpu_context() {
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Registering the system for memory faults", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
 
   init_tasks();
@@ -1018,7 +1008,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Symbol info extraction", get_error_string(err));
-    exit(1);
+    return err;
   }
   DEBUG_PRINT("Exec Symbol type: %d\n", type);
   if (type == HSA_SYMBOL_KIND_KERNEL) {
@@ -1027,7 +1017,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     char *name = reinterpret_cast<char *>(malloc(name_length + 1));
     err = hsa_executable_symbol_get_info(symbol,
@@ -1035,7 +1025,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     name[name_length] = 0;
 
@@ -1043,12 +1033,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
       // did not find kernel name in the kernel map; this can happen only
       // if the ROCr API for getting symbol info (name) is 
diff erent from
       // the comgr method of getting symbol info
-      if (HSA_STATUS_ERROR_INVALID_CODE_OBJECT != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Invalid kernel name",
-               get_error_string(HSA_STATUS_ERROR_INVALID_CODE_OBJECT));
-        exit(1);
-      }
+      return HSA_STATUS_ERROR;
     }
     atl_kernel_info_t info;
     std::string kernelName = KernelNameMap[std::string(name)];
@@ -1056,12 +1041,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     // because the non-ROCr custom code object parsing is called before
     // iterating over the code object symbols using ROCr
     if (KernelInfoTable[gpu].find(kernelName) == KernelInfoTable[gpu].end()) {
-      if (HSA_STATUS_ERROR_INVALID_CODE_OBJECT != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Finding the entry kernel info table",
-               get_error_string(HSA_STATUS_ERROR_INVALID_CODE_OBJECT));
-        exit(1);
-      }
+      return HSA_STATUS_ERROR;
     }
     // found, so assign and update
     info = KernelInfoTable[gpu][kernelName];
@@ -1074,7 +1054,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the symbol from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
     err = hsa_executable_symbol_get_info(
         symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
@@ -1083,7 +1063,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the group segment size from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
     err = hsa_executable_symbol_get_info(
         symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
@@ -1092,7 +1072,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Extracting the private segment from the executable",
              get_error_string(err));
-      exit(1);
+      return err;
     }
 
     DEBUG_PRINT(
@@ -1110,7 +1090,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     char *name = reinterpret_cast<char *>(malloc(name_length + 1));
     err = hsa_executable_symbol_get_info(symbol,
@@ -1118,7 +1098,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
     name[name_length] = 0;
 
@@ -1129,7 +1109,7 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info address extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     err = hsa_executable_symbol_get_info(
@@ -1137,14 +1117,17 @@ static hsa_status_t populate_InfoTables(hsa_executable_t executable,
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Symbol info size extraction", get_error_string(err));
-      exit(1);
+      return err;
     }
 
     atmi_mem_place_t place = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu, 0);
     DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
                 info.size);
-    register_allocation(reinterpret_cast<void *>(info.addr), (size_t)info.size,
-                        place);
+    err = register_allocation(reinterpret_cast<void *>(info.addr),
+                              (size_t)info.size, place);
+    if (err != HSA_STATUS_SUCCESS) {
+      return err;
+    }
     SymbolInfoTable[gpu][std::string(name)] = info;
     if (strcmp(name, "needs_hostcall_buffer") == 0)
       g_atmi_hostcall_required = true;
@@ -1174,7 +1157,7 @@ atmi_status_t Runtime::RegisterModuleFromMemory(
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Query the agent profile", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
   // FIXME: Assume that every profile is FULL until we understand how to build
   // GCN with base profile
@@ -1185,7 +1168,7 @@ atmi_status_t Runtime::RegisterModuleFromMemory(
   if (err != HSA_STATUS_SUCCESS) {
     printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
            "Create the executable", get_error_string(err));
-    exit(1);
+    return ATMI_STATUS_ERROR;
   }
 
   bool module_load_success = false;
@@ -1223,7 +1206,7 @@ atmi_status_t Runtime::RegisterModuleFromMemory(
         printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
                "Error in deserialized_data callback",
                get_atmi_error_string(atmi_err));
-        exit(1);
+        return atmi_err;
       }
 
       /* Load the code object.  */
@@ -1246,7 +1229,7 @@ atmi_status_t Runtime::RegisterModuleFromMemory(
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Freeze the executable", get_error_string(err));
-      exit(1);
+      return ATMI_STATUS_ERROR;
     }
 
     err = hsa_executable_iterate_symbols(executable, populate_InfoTables,
@@ -1254,7 +1237,7 @@ atmi_status_t Runtime::RegisterModuleFromMemory(
     if (err != HSA_STATUS_SUCCESS) {
       printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
              "Iterating over symbols for execuatable", get_error_string(err));
-      exit(1);
+      return ATMI_STATUS_ERROR;
     }
 
     // save the executable and destroy during finalize

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
index 2aa09ff47e40..3c62f35f5b87 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
@@ -99,7 +99,6 @@ void Environment::GetEnvAll() {
     std::cout << "ATMI_MAX_HSA_QUEUE_SIZE : positive integer" << std::endl
               << "ATMI_DEBUG : 1 for printing out trace/debug info"
               << std::endl;
-    exit(0);
   }
 
   var = GetEnv("ATMI_MAX_HSA_QUEUE_SIZE");

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 944d9d6dd59c..de76b80be931 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -72,7 +72,6 @@ hostrpc_assign_buffer(hsa_agent_t, hsa_queue_t *, uint32_t device_id) {
 
 int print_kernel_trace;
 
-#undef check // Drop definition from internal.h
 #ifdef OMPTARGET_DEBUG
 #define check(msg, status)                                                     \
   if (status != ATMI_STATUS_SUCCESS) {                                         \
@@ -116,11 +115,8 @@ struct KernelArgPool {
   ~KernelArgPool() {
     if (kernarg_region) {
       auto r = hsa_amd_memory_pool_free(kernarg_region);
-      assert(r == HSA_STATUS_SUCCESS);
       if (r != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Memory pool free", get_error_string(r));
-        exit(1);
+        DP("hsa_amd_memory_pool_free failed: %s\n", get_error_string(r));
       }
     }
   }
@@ -135,22 +131,33 @@ struct KernelArgPool {
 
     // atmi uses one pool per kernel for all gpus, with a fixed upper size
     // preserving that exact scheme here, including the queue<int>
-    {
-      hsa_status_t err = hsa_amd_memory_pool_allocate(
-          atl_gpu_kernarg_pools[0],
-          kernarg_size_including_implicit() * MAX_NUM_KERNELS, 0,
-          &kernarg_region);
-      if (err != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Allocating memory for the executable-kernel",
-               get_error_string(err));
-        exit(1);
-      }
-      core::allow_access_to_all_gpu_agents(kernarg_region);
 
-      for (int i = 0; i < MAX_NUM_KERNELS; i++) {
-        free_kernarg_segments.push(i);
+    hsa_status_t err = hsa_amd_memory_pool_allocate(
+        atl_gpu_kernarg_pools[0],
+        kernarg_size_including_implicit() * MAX_NUM_KERNELS, 0,
+        &kernarg_region);
+
+    if (err != HSA_STATUS_SUCCESS) {
+      DP("hsa_amd_memory_pool_allocate failed: %s\n", get_error_string(err));
+      kernarg_region = nullptr; // paranoid
+      return;
+    }
+
+    err = core::allow_access_to_all_gpu_agents(kernarg_region);
+    if (err != HSA_STATUS_SUCCESS) {
+      DP("hsa allow_access_to_all_gpu_agents failed: %s\n",
+         get_error_string(err));
+      auto r = hsa_amd_memory_pool_free(kernarg_region);
+      if (r != HSA_STATUS_SUCCESS) {
+        // if free failed, can't do anything more to resolve it
+        DP("hsa memory poll free failed: %s\n", get_error_string(err));
       }
+      kernarg_region = nullptr;
+      return;
+    }
+
+    for (int i = 0; i < MAX_NUM_KERNELS; i++) {
+      free_kernarg_segments.push(i);
     }
   }
 
@@ -474,17 +481,18 @@ class RTLDeviceInfoTy {
     NumThreads.resize(NumberOfDevices);
     deviceStateStore.resize(NumberOfDevices);
 
+    for (int i = 0; i < NumberOfDevices; i++) {
+      HSAQueues[i] = nullptr;
+    }
+
     for (int i = 0; i < NumberOfDevices; i++) {
       uint32_t queue_size = 0;
       {
-        hsa_status_t err;
-        err = hsa_agent_get_info(HSAAgents[i], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
-                                 &queue_size);
+        hsa_status_t err = hsa_agent_get_info(
+            HSAAgents[i], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
         if (err != HSA_STATUS_SUCCESS) {
-          printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-                 "Querying the agent maximum queue size",
-                 get_error_string(err));
-          exit(1);
+          DP("HSA query QUEUE_MAX_SIZE failed for agent %d\n", i);
+          return;
         }
         if (queue_size > core::Runtime::getInstance().getMaxQueueSize()) {
           queue_size = core::Runtime::getInstance().getMaxQueueSize();
@@ -495,7 +503,7 @@ class RTLDeviceInfoTy {
           HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL,
           UINT32_MAX, UINT32_MAX, &HSAQueues[i]);
       if (rc != HSA_STATUS_SUCCESS) {
-        DP("Failed to create HSA queues\n");
+        DP("Failed to create HSA queue %d\n", i);
         return;
       }
 
@@ -540,18 +548,6 @@ class RTLDeviceInfoTy {
     RequiresFlags = OMP_REQ_UNDEFINED;
   }
 
-  void DestroyHSAExecutables() {
-    hsa_status_t Err;
-    for (uint32_t I = 0; I < HSAExecutables.size(); I++) {
-      Err = hsa_executable_destroy(HSAExecutables[I]);
-      if (Err != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Destroying executable", get_error_string(Err));
-        return;
-      }
-    }
-  }
-
   ~RTLDeviceInfoTy() {
     DP("Finalizing the HSA-ATMI DeviceInfo.\n");
     // Run destructors on types that use HSA before
@@ -561,7 +557,14 @@ class RTLDeviceInfoTy {
     // Terminate hostrpc before finalizing ATMI
     hostrpc_terminate();
 
-    DestroyHSAExecutables();
+    for (uint32_t I = 0; I < HSAExecutables.size(); I++) {
+      hsa_status_t Err = hsa_executable_destroy(HSAExecutables[I]);
+      if (Err != HSA_STATUS_SUCCESS) {
+        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+           "Destroying executable", get_error_string(Err));
+      }
+    }
+
     atmi_finalize();
   }
 };
@@ -1010,9 +1013,8 @@ static uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) {
 
     if (rc == 0) {
       if (size_si.size != sizeof(uint64_t)) {
-        fprintf(stderr,
-                "Found device_State_size variable with wrong size, aborting\n");
-        exit(1);
+        DP("Found device_State_size variable with wrong size\n");
+        return 0;
       }
 
       // Read number of bytes directly from the elf
@@ -1828,6 +1830,9 @@ int32_t __tgt_rtl_run_target_team_region_locked(
   // Run on the device.
   {
     hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id];
+    if (!queue) {
+      return OFFLOAD_FAIL;
+    }
     uint64_t packet_id = acquire_available_packet_id(queue);
 
     const uint32_t mask = queue->size - 1; // size is a power of 2
@@ -1867,8 +1872,8 @@ int32_t __tgt_rtl_run_target_team_region_locked(
       }
     }
     if (!ArgPool) {
-      fprintf(stderr, "Warning: No ArgPool for %s on device %d\n",
-              KernelInfo->Name, device_id);
+      DP("Warning: No ArgPool for %s on device %d\n", KernelInfo->Name,
+         device_id);
     }
     {
       void *kernarg = nullptr;
@@ -1877,8 +1882,8 @@ int32_t __tgt_rtl_run_target_team_region_locked(
         kernarg = ArgPool->allocate(arg_num);
       }
       if (!kernarg) {
-        printf("Allocate kernarg failed\n");
-        exit(1);
+        DP("Allocate kernarg failed\n");
+        return OFFLOAD_FAIL;
       }
 
       // Copy explicit arguments
@@ -1919,8 +1924,8 @@ int32_t __tgt_rtl_run_target_team_region_locked(
     {
       hsa_signal_t s = DeviceInfo.FreeSignalPool.pop();
       if (s.handle == 0) {
-        printf("Failed to get signal instance\n");
-        exit(1);
+        DP("Failed to get signal instance\n");
+        return OFFLOAD_FAIL;
       }
       packet->completion_signal = s;
       hsa_signal_store_relaxed(packet->completion_signal, 1);