[Mlir-commits] [llvm] [mlir] [OMPIRBuilder] - Make offloading input data persist for deferred target tasks (PR #133499)
Sergio Afonso
llvmlistbot at llvm.org
Thu May 8 05:50:53 PDT 2025
================
@@ -7093,49 +7112,110 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
ProxyFn->getArg(0)->setName("thread.id");
ProxyFn->getArg(1)->setName("task");
+ bool HasShareds = SharedArgsOperandNo > 0;
+ bool HasOffloadingArrays = NumOffloadingArrays > 0;
BasicBlock *EntryBB =
BasicBlock::Create(Builder.getContext(), "entry", ProxyFn);
Builder.SetInsertPoint(EntryBB);
- bool HasShareds = StaleCI->arg_size() > 1;
- // TODO: This is a temporary assert to prove to ourselves that
- // the outlined target launch function is always going to have
- // atmost two arguments if there is any data shared between
- // host and device.
- assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
- "StaleCI with shareds should have exactly two arguments.");
-
Value *ThreadId = ProxyFn->getArg(0);
+ Value *TaskWithPrivates = ProxyFn->getArg(1);
+
+ SmallVector<Value *> KernelLaunchArgs;
+ KernelLaunchArgs.reserve(StaleCI->arg_size());
+ KernelLaunchArgs.push_back(ThreadId);
+
+ if (HasOffloadingArrays) {
+ assert(TaskTy != TaskWithPrivatesTy &&
+ "If there are offloading arrays to pass to the target"
+ "TaskTy cannot be the same as TaskWithPrivatesTy");
+ Value *Privates =
+ Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
+ for (unsigned int i = 0; i < NumOffloadingArrays; ++i)
+ KernelLaunchArgs.push_back(
+ Builder.CreateStructGEP(PrivatesTy, Privates, i));
+ }
+
if (HasShareds) {
- auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+ auto *ArgStructAlloca =
+ dyn_cast<AllocaInst>(StaleCI->getArgOperand(SharedArgsOperandNo));
assert(ArgStructAlloca &&
"Unable to find the alloca instruction corresponding to arguments "
"for extracted function");
auto *ArgStructType = cast<StructType>(ArgStructAlloca->getAllocatedType());
AllocaInst *NewArgStructAlloca =
Builder.CreateAlloca(ArgStructType, nullptr, "structArg");
- Value *TaskT = ProxyFn->getArg(1);
+
Value *SharedsSize =
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
- Value *Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
+ Value *TaskT =
+ Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
+ Value *Shareds = TaskT;
+ // TaskWithPrivatesTy can be
+ // %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
+ // %struct.privates }
+ // OR
+ // %struct.kmp_task_ompbuilder_t ;; This is simply TaskTy
+ // In the former case, that is when TaskWithPrivatesTy is not the same as
+ // TaskTy, then its first member has to be the task descriptor. TaskTy is
+ // the type of the task descriptor. TaskT is the pointer to the task
+ // descriptor. Loading the first member of TaskT, gives us the pointer to
+ // shared data.
+ if (TaskWithPrivatesTy != TaskTy)
+ Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
LoadInst *LoadShared =
Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds);
Builder.CreateMemCpy(
NewArgStructAlloca, NewArgStructAlloca->getAlign(), LoadShared,
LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize);
-
- Builder.CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
- } else {
- Builder.CreateCall(KernelLaunchFunction, {ThreadId});
+ KernelLaunchArgs.push_back(NewArgStructAlloca);
}
-
+ Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
Builder.CreateRetVoid();
return ProxyFn;
}
+// This function returns a struct that has at most two members.
+// The first member is always %struct.kmp_task_ompbuilder_t, that is the task
+// descriptor. The second member, if needed, is a struct containing arrays
+// that need to be passed to the offloaded target kernel. For example,
+// if .offload_baseptrs, .offload_ptrs and .offload_sizes have to be passed to
+// the target kernel and their types are [3 x ptr], [3 x ptr] and [3 x i64]
+// respectively, then the types created by this function are
+//
+// %struct.privates = type { [3 x ptr], [3 x ptr], [3 x i64] }
+// %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
+// %struct.privates }
+// %struct.task_with_privates is returned by this function.
+// If there aren't any offloading arrays to pass to the target kernel,
+// %struct.kmp_task_ompbuilder_t is returned.
+static StructType *
+createTaskWithPrivatesTy(Type *Task,
+ ArrayRef<Value *> OffloadingArraysToPrivatize) {
+
+ if (OffloadingArraysToPrivatize.empty())
+ return static_cast<StructType *>(Task);
+
+ SmallVector<Type *, 4> StructFieldTypes;
+ for (auto &V : OffloadingArraysToPrivatize) {
----------------
skatrak wrote:
```suggestion
for (Value *V : OffloadingArraysToPrivatize) {
```
https://github.com/llvm/llvm-project/pull/133499
More information about the Mlir-commits
mailing list