[clang] [llvm] [mlir] [OMPIRBuilder] - Handle dependencies in `createTarget` (PR #93977)
Michael Kruse via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 7 11:05:41 PDT 2024
================
@@ -5229,13 +5362,288 @@ static void emitTargetOutlinedFunction(
OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
OutlinedFn, OutlinedFnID);
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
+ Function *OutlinedFn, Value *OutlinedFnID,
+ EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
+ Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP,
+ SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
+ bool HasNoWait) {
+
+ // When we arrive at this function, the target region itself has been
+ // outlined into the function OutlinedFn.
+ // So at ths point, for
+ // --------------------------------------------------
+ // void user_code_that_offloads(...) {
+ // omp target depend(..) map(from:a) map(to:b, c)
+ // a = b + c
+ // }
+ //
+ // --------------------------------------------------
+ //
+ // we have
+ //
+ // --------------------------------------------------
+ //
+ // void user_code_that_offloads(...) {
+ // %.offload_baseptrs = alloca [3 x ptr], align 8
+ // %.offload_ptrs = alloca [3 x ptr], align 8
+ // %.offload_mappers = alloca [3 x ptr], align 8
+ // ;; target region has been outlined and now we need to
+ // ;; offload to it via a target task.
+ // }
+ // void outlined_device_function(ptr a, ptr b, ptr c) {
+ // *a = *b + *c
+ // }
+ //
+ // We have to now do the following
+ // (i) Make an offloading call to outlined_device_function using the OpenMP
+ // RTL. See 'kernel_launch_function' in the pseudo code below. This is
+ // emitted by emitKernelLaunch
+ // (ii) Create a task entry point function that calls kernel_launch_function
+ // and is the entry point for the target task. See
+ // '@.omp_target_task_proxy_func in the pseudocode below.
+ // (iii) Create a task with the task entry point created in (ii)
+ //
+ // That is we create the following
+ //
+ // void user_code_that_offloads(...) {
+ // %.offload_baseptrs = alloca [3 x ptr], align 8
+ // %.offload_ptrs = alloca [3 x ptr], align 8
+ // %.offload_mappers = alloca [3 x ptr], align 8
+ //
+ // %structArg = alloca { ptr, ptr, ptr }, align 8
+ // %strucArg[0] = %.offload_baseptrs
+ // %strucArg[1] = %.offload_ptrs
+ // %strucArg[2] = %.offload_mappers
+ // proxy_target_task = @__kmpc_omp_task_alloc(...,
+ // @.omp_target_task_proxy_func)
+ // memcpy(proxy_target_task->shareds, %structArg, sizeof(structArg))
+ // dependencies_array = ...
+ // ;; if nowait not present
+ // call @__kmpc_omp_wait_deps(..., dependencies_array)
+ // call @__kmpc_omp_task_begin_if0(...)
+ // call @ @.omp_target_task_proxy_func(i32 thread_id, ptr
+ // %proxy_target_task) call @__kmpc_omp_task_complete_if0(...)
+ // }
+ //
+ // define internal void @.omp_target_task_proxy_func(i32 %thread.id,
+ // ptr %task) {
+ // %structArg = alloca {ptr, ptr, ptr}
+ // %shared_data = load (getelementptr %task, 0, 0)
+ // mempcy(%structArg, %shared_data, sizeof(structArg))
+ // kernel_launch_function(%thread.id, %structArg)
+ // }
+ //
+ // We need the proxy function because the signature of the task entry point
+ // expected by kmpc_omp_task is always the same and will be different from
+ // that of the kernel_launch function.
+ //
+ // kernel_launch_function is generated by emitKernelLaunch and has the
+ // always_inline attribute. void kernel_launch_function(thread_id,
----------------
Meinersbur wrote:
[nit] line break
https://github.com/llvm/llvm-project/pull/93977
More information about the cfe-commits
mailing list