[llvm-branch-commits] [openmp] 76d5d54 - Avoid use of stack allocations in asynchronous calls
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 19 22:23:28 PST 2021
Author: Johannes Doerfert
Date: 2021-02-19T22:22:50-08:00
New Revision: 76d5d54f62599d249e0bf2d1b0998451a584c3f3
URL: https://github.com/llvm/llvm-project/commit/76d5d54f62599d249e0bf2d1b0998451a584c3f3
DIFF: https://github.com/llvm/llvm-project/commit/76d5d54f62599d249e0bf2d1b0998451a584c3f3.diff
LOG: Avoid use of stack allocations in asynchronous calls
NOTE: This is an adaption of the original patch to be applicable to the
LLVM 12 release branch. Logic is the same though.
As reported by Guilherme Valarini [0], we used to pass stack allocations
to calls that can nowadays be asynchronous. This is arguably a problem
and it will inevitably result in UB. To remedy the situation we allocate
the locations as part of the AsyncInfoTy object. The lifetime of that
object matches what we need for now. If the synchronization is not tied
to the AsyncInfoTy object anymore we might need to have a different
buffer construct in global space.
This should be back-ported to LLVM 12 but needs slight modifications as
it is based on refactoring patches we do not need to backport.
[0] https://lists.llvm.org/pipermail/openmp-dev/2021-February/003867.html
Differential Revision: https://reviews.llvm.org/D96667
Added:
Modified:
openmp/libomptarget/include/omptarget.h
openmp/libomptarget/src/omptarget.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 9c533944d135..46bb8206efa1 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -14,6 +14,8 @@
#ifndef _OMPTARGET_H_
#define _OMPTARGET_H_
+#include <deque>
+#include <stddef.h>
#include <stdint.h>
#include <stddef.h>
@@ -119,10 +121,18 @@ struct __tgt_target_table {
/// This struct contains information exchanged between
diff erent asynchronous
/// operations for device-dependent optimization and potential synchronization
struct __tgt_async_info {
+ /// Locations we used in (potentially) asynchronous calls which should live
+ /// as long as this AsyncInfoTy object.
+ std::deque<void *> BufferLocations;
+
// A pointer to a queue-like structure where offloading operations are issued.
// We assume to use this structure to do synchronization. In CUDA backend, it
// is CUstream.
void *Queue = nullptr;
+
+ /// Return a void* reference with a lifetime that is at least as long as this
+ /// AsyncInfoTy object. The location can be used as intermediate buffer.
+ void *&getVoidPtrLocation();
};
/// This struct is a record of non-contiguous information
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index e4b7b18bc70b..37150aae2fe6 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -18,6 +18,13 @@
#include <cassert>
#include <vector>
+/// Return a void* reference with a lifetime that is at least as long as this
+/// AsyncInfoTy object. The location can be used as intermediate buffer.
+void *&__tgt_async_info::getVoidPtrLocation() {
+ BufferLocations.push_back(nullptr);
+ return BufferLocations.back();
+}
+
/* All begin addresses for partially mapped structs must be 8-aligned in order
* to ensure proper alignment of members. E.g.
*
@@ -415,7 +422,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
- void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
+ void *&TgtPtrBase = async_info_ptr->getVoidPtrLocation();
+ TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
sizeof(void *), async_info_ptr);
if (rt != OFFLOAD_SUCCESS) {
@@ -1122,8 +1130,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
- void *PointerTgtPtrBegin = Device.getTgtPtrBegin(
- HstPtrVal, ArgSizes[I], IsLast, false, IsHostPtr);
+ void *&PointerTgtPtrBegin = AsyncInfo->getVoidPtrLocation();
+ PointerTgtPtrBegin = Device.getTgtPtrBegin(HstPtrVal, ArgSizes[I],
+ IsLast, false, IsHostPtr);
if (!PointerTgtPtrBegin) {
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
DPxPTR(HstPtrVal));
More information about the llvm-branch-commits
mailing list