[Mlir-commits] [llvm] [mlir] [OMPIRBuilder] - Make offloading input data persist for deferred target tasks (PR #133499)

Sergio Afonso llvmlistbot at llvm.org
Thu May 8 05:50:53 PDT 2025


================
@@ -7093,49 +7112,110 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
   ProxyFn->getArg(0)->setName("thread.id");
   ProxyFn->getArg(1)->setName("task");
 
+  bool HasShareds = SharedArgsOperandNo > 0;
+  bool HasOffloadingArrays = NumOffloadingArrays > 0;
   BasicBlock *EntryBB =
       BasicBlock::Create(Builder.getContext(), "entry", ProxyFn);
   Builder.SetInsertPoint(EntryBB);
 
-  bool HasShareds = StaleCI->arg_size() > 1;
-  // TODO: This is a temporary assert to prove to ourselves that
-  // the outlined target launch function is always going to have
-  // atmost two arguments if there is any data shared between
-  // host and device.
-  assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
-         "StaleCI with shareds should have exactly two arguments.");
-
   Value *ThreadId = ProxyFn->getArg(0);
+  Value *TaskWithPrivates = ProxyFn->getArg(1);
+
+  SmallVector<Value *> KernelLaunchArgs;
+  KernelLaunchArgs.reserve(StaleCI->arg_size());
+  KernelLaunchArgs.push_back(ThreadId);
+
+  if (HasOffloadingArrays) {
+    assert(TaskTy != TaskWithPrivatesTy &&
+           "If there are offloading arrays to pass to the target"
+           "TaskTy cannot be the same as TaskWithPrivatesTy");
+    Value *Privates =
+        Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
+    for (unsigned int i = 0; i < NumOffloadingArrays; ++i)
+      KernelLaunchArgs.push_back(
+          Builder.CreateStructGEP(PrivatesTy, Privates, i));
+  }
+
   if (HasShareds) {
-    auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+    auto *ArgStructAlloca =
+        dyn_cast<AllocaInst>(StaleCI->getArgOperand(SharedArgsOperandNo));
     assert(ArgStructAlloca &&
            "Unable to find the alloca instruction corresponding to arguments "
            "for extracted function");
     auto *ArgStructType = cast<StructType>(ArgStructAlloca->getAllocatedType());
 
     AllocaInst *NewArgStructAlloca =
         Builder.CreateAlloca(ArgStructType, nullptr, "structArg");
-    Value *TaskT = ProxyFn->getArg(1);
+
     Value *SharedsSize =
         Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
 
-    Value *Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
+    Value *TaskT =
+        Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
+    Value *Shareds = TaskT;
+    // TaskWithPrivatesTy can be
+    // %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
+    // %struct.privates }
+    // OR
+    //  %struct.kmp_task_ompbuilder_t  ;; This is simply TaskTy
+    // In the former case, that is when  TaskWithPrivatesTy is not the same as
+    // TaskTy, then its first member has to be the task descriptor. TaskTy is
+    // the type of the task descriptor. TaskT is the pointer to the task
+    // descriptor. Loading the first member of TaskT, gives us the pointer to
+    // shared data.
+    if (TaskWithPrivatesTy != TaskTy)
+      Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
     LoadInst *LoadShared =
         Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds);
 
     Builder.CreateMemCpy(
         NewArgStructAlloca, NewArgStructAlloca->getAlign(), LoadShared,
         LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize);
-
-    Builder.CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
-  } else {
-    Builder.CreateCall(KernelLaunchFunction, {ThreadId});
+    KernelLaunchArgs.push_back(NewArgStructAlloca);
   }
-
+  Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
   Builder.CreateRetVoid();
   return ProxyFn;
 }
 
+// This function returns a struct that has at most two members.
+// The first member is always %struct.kmp_task_ompbuilder_t, that is the task
+// descriptor. The second member, if needed, is a struct containing arrays
+// that need to be passed to the offloaded target kernel. For example,
+// if .offload_baseptrs, .offload_ptrs and .offload_sizes have to be passed to
+// the target kernel and their types are [3 x ptr], [3 x ptr] and [3 x i64]
+// respectively, then the types created  by this function are
+//
+// %struct.privates = type { [3 x ptr], [3 x ptr], [3 x i64] }
+// %struct.task_with_privates = type { %struct.kmp_task_ompbuilder_t,
+//                                     %struct.privates }
+// %struct.task_with_privates is returned by this function.
+// If there aren't any offloading arrays to pass to the target kernel,
+// %struct.kmp_task_ompbuilder_t is returned.
+static StructType *
+createTaskWithPrivatesTy(Type *Task,
+                         ArrayRef<Value *> OffloadingArraysToPrivatize) {
+
+  if (OffloadingArraysToPrivatize.empty())
+    return static_cast<StructType *>(Task);
+
+  SmallVector<Type *, 4> StructFieldTypes;
+  for (auto &V : OffloadingArraysToPrivatize) {
----------------
skatrak wrote:

```suggestion
  for (Value *V : OffloadingArraysToPrivatize) {
```

https://github.com/llvm/llvm-project/pull/133499


More information about the Mlir-commits mailing list