[llvm-branch-commits] [openmp] f628eef - [libomptarget][amdgpu] Fix latent race in load binary
Jon Chesterfield via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 4 08:34:16 PST 2020
Author: Jon Chesterfield
Date: 2020-12-04T16:29:09Z
New Revision: f628eef98acd24f8eb6a52d67ee887bb18f04bca
URL: https://github.com/llvm/llvm-project/commit/f628eef98acd24f8eb6a52d67ee887bb18f04bca
DIFF: https://github.com/llvm/llvm-project/commit/f628eef98acd24f8eb6a52d67ee887bb18f04bca.diff
LOG: [libomptarget][amdgpu] Fix latent race in load binary
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index f22b4697f30b..ea8770e4543a 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -925,6 +925,26 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
return res;
}
+static atmi_status_t atmi_calloc(void **ret_ptr, size_t size,
+ atmi_mem_place_t place) {
+ uint64_t rounded = 4 * ((size + 3) / 4);
+ void *ptr;
+ atmi_status_t err = atmi_malloc(&ptr, rounded, place);
+ if (err != ATMI_STATUS_SUCCESS) {
+ return err;
+ }
+
+ hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, rounded / 4);
+ if (rc != HSA_STATUS_SUCCESS) {
+ fprintf(stderr, "zero fill device_state failed with %u\n", rc);
+ atmi_free(ptr);
+ return ATMI_STATUS_ERROR;
+ }
+
+ *ret_ptr = ptr;
+ return ATMI_STATUS_SUCCESS;
+}
+
__tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
__tgt_device_image *image) {
// This function loads the device image onto gpu[device_id] and does other
@@ -1024,7 +1044,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
assert(dss.second == 0);
void *ptr = NULL;
atmi_status_t err =
- atmi_malloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
+ atmi_calloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
if (err != ATMI_STATUS_SUCCESS) {
fprintf(stderr, "Failed to allocate device_state array\n");
return NULL;
@@ -1062,13 +1082,6 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
fprintf(stderr, "memcpy install of state_ptr failed\n");
return NULL;
}
-
- assert((device_State_bytes & 0x3) == 0); // known >= 4 byte aligned
- hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, device_State_bytes / 4);
- if (rc != HSA_STATUS_SUCCESS) {
- fprintf(stderr, "zero fill device_state failed with %u\n", rc);
- return NULL;
- }
}
// TODO: Check with Guansong to understand the below comment more thoroughly.
More information about the llvm-branch-commits
mailing list