[Openmp-commits] [openmp] [libomptarget][OpenMP] Initial implementation of omp_target_memset() and omp_target_memset_async() (PR #68706)
Michael Klemm via Openmp-commits
openmp-commits at lists.llvm.org
Tue Oct 10 10:25:38 PDT 2023
================
@@ -241,10 +241,125 @@ static int libomp_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
return Rc;
}
+static int libomp_target_memset_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
+ if (!Task) {
+ return OFFLOAD_FAIL;
+ }
+
+ auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds);
+ if (!Args) {
+ return OFFLOAD_FAIL;
+ }
+
+ // call omp_target_memset()
+ omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum);
+
+ delete Args;
+
+ return OFFLOAD_SUCCESS;
+}
+
+static inline void
+ConvertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount,
+ omp_depend_t *DepObjList) {
+ for (int i = 0; i < DepObjCount; ++i) {
+ omp_depend_t DepObj = DepObjList[i];
+ Vec.push_back(*((kmp_depend_info_t *)DepObj));
+ }
+}
+
+static int libomp_helper_memset_task_creation(TargetMemsetArgsTy *Args,
+ int DepObjCount,
+ omp_depend_t *DepObjList) {
+ // Create global thread ID
+ int Gtid = __kmpc_global_thread_num(nullptr);
+ int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memset_async_helper;
+
+ // Setup the hidden helper flags
+ kmp_int32 Flags = 0;
+ kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
+ InputFlags->hidden_helper = 1;
+
+ // Alloc the helper task
+ kmp_task_t *Task = __kmpc_omp_target_task_alloc(
+ nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1);
+ if (!Task) {
+ delete Args;
+ return OFFLOAD_FAIL;
+ }
+
+ // Setup the arguments for the helper task
+ Task->shareds = Args;
+
+ // Convert types of depend objects
+ llvm::SmallVector<kmp_depend_info_t> DepObjs;
+ ConvertDepObjVector(DepObjs, DepObjCount, DepObjList);
+
+ // Launch the helper task
+ int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount,
+ DepObjs.data(), 0, nullptr);
+
+ return Rc;
+}
+
+EXTERN void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum) {
+ TIMESCOPE();
+ DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
+ DeviceNum, Ptr, N);
+
+ // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
+ // of unspecified behavior, see OpenMP spec).
+ if (!Ptr || N == 0) {
+ return Ptr;
+ }
+
+ if (DeviceNum == omp_get_initial_device()) {
+ DP("filling memory on host via memset");
+ memset(Ptr, C, N); // ignore return value, memset() cannot fail
+ } else {
+ // TODO: replace the omp_target_memset() slow path with the fast path.
+ // That will require the ability to execute a kernel from within
+ // libomptarget.so (which we do not have at the moment).
+
+ // This is a very slow path: create a filled array on the host and upload
+ // it to the GPU device.
+ int InitialDevice = omp_get_initial_device();
+ void *Shadow = omp_target_alloc(N, InitialDevice);
+ (void)memset(Shadow, C, N);
+ (void)omp_target_memcpy(Ptr, Shadow, N, 0, 0, DeviceNum, InitialDevice);
+ (void)omp_target_free(Shadow, InitialDevice);
+ }
+
+ DP("omp_target_memset returns %p\n", Ptr);
+ return Ptr;
+}
+
+EXTERN void *omp_target_memset_async(void *Ptr, int C, size_t N, int DeviceNum,
+ int DepObjCount,
+ omp_depend_t *DepObjList) {
+ DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
+ DeviceNum, Ptr, N);
+
+ // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
+ // of unspecified behavior, see OpenMP spec).
+ if (!Ptr || N == 0) {
+ return Ptr;
+ }
+
+ // Create the task object to deal with the async invocation
+ auto *Args = new TargetMemsetArgsTy{Ptr, C, N, DeviceNum};
+
+ // omp_target_memset_async() cannot fail via a return code, so ignore the
+ // return code of the helper function
+ (void)libomp_helper_memset_task_creation(Args, DepObjCount, DepObjList);
+
+ return Ptr;
+}
+
// Allocate and launch helper task
-static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args,
- int DepObjCount,
- omp_depend_t *DepObjList) {
+static int libomp_helper_memcpy_task_creation(TargetMemcpyArgsTy *Args,
----------------
mjklemm wrote:
How about the last patch set? The duplication of the helper function, but I have retained the actual task function. There's still a tiny bit of code duplication, but I felt like that the reduction using a callback is not saving much at the expense of making the actual control flow even harder to understand. Let me know if you'd agree to that,
https://github.com/llvm/llvm-project/pull/68706
More information about the Openmp-commits
mailing list