[clang] [llvm] [mlir] [OMPIRBuilder] - Handle dependencies in `createTarget` (PR #93977)

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 7 11:05:41 PDT 2024


================
@@ -5212,6 +5273,78 @@ static Function *createOutlinedFunction(
   return Func;
 }
 
+// Create an entry point for a target task with the following.
+// It'll have the following signature
+// void @.omp_target_task_proxy_func(i32 %thread.id, ptr %task)
+// This function is called from emitTargetTask once the
+// code to launch the target kernel has been outlined already.
+static Function *emitProxyTaskFunction(OpenMPIRBuilder &OMPBuilder,
+                                       IRBuilderBase &Builder,
+                                       CallInst *StaleCI) {
+  Module &M = OMPBuilder.M;
+  // CalledFunction is the target launch function, i.e.
+  // the function that sets up kernel arguments and calls
+  // __tgt_target_kernel to launch the kernel on the device.
+  Function *CalledFunction = StaleCI->getCalledFunction();
+  OpenMPIRBuilder::InsertPointTy IP(StaleCI->getParent(),
+                                    StaleCI->getIterator());
+  LLVMContext &Ctx = StaleCI->getParent()->getContext();
+  Type *ThreadIDTy = Type::getInt32Ty(Ctx);
+  Type *TaskPtrTy = OMPBuilder.TaskPtr;
+  Type *TaskTy = OMPBuilder.Task;
+  auto ProxyFnTy =
+      FunctionType::get(Builder.getVoidTy(), {ThreadIDTy, TaskPtrTy},
+                        /* isVarArg */ false);
+  auto ProxyFn = Function::Create(ProxyFnTy, GlobalValue::InternalLinkage,
+                                  ".omp_target_task_proxy_func",
+                                  Builder.GetInsertBlock()->getModule());
+
+  BasicBlock *EntryBB =
+      BasicBlock::Create(Builder.getContext(), "entry", ProxyFn);
+  Builder.SetInsertPoint(EntryBB);
+
+  bool HasShareds = StaleCI->arg_size() > 1;
+  // TODO: This is a temporary assert to prove to ourselves that
+  // the outlined target launch function is always going to have
+  // atmost two arguments if there is any data shared between
+  // host and device.
+  assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
+         "StaleCI with shareds should have exactly two arguments.");
+  if (HasShareds) {
+    AllocaInst *ArgStructAlloca =
+        dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+    assert(ArgStructAlloca &&
+           "Unable to find the alloca instruction corresponding to arguments "
+           "for extracted function");
+    StructType *ArgStructType =
+        dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+    LLVM_DEBUG(dbgs() << "ArgStructType = " << *ArgStructType << "\n");
+
+    AllocaInst *NewArgStructAlloca =
+        Builder.CreateAlloca(ArgStructType, nullptr, "structArg");
+    Value *TaskT = ProxyFn->getArg(1);
+    Value *ThreadId = ProxyFn->getArg(0);
+    LLVM_DEBUG(dbgs() << "TaskT = " << *TaskT << "\n");
+    Value *SharedsSize =
+        Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+
+    Value *Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
+    LoadInst *LoadShared =
+        Builder.CreateLoad(PointerType::getUnqual(Ctx), Shareds);
+
+    // TODO: Are these alignment values correct?
----------------
Meinersbur wrote:

I think `NewArgStructAlloca->getAlign()` shold be sufficient. If the alloca/load doesn't have the align set explicitly, `memcpy` should apply pointer alignment itself.

https://github.com/llvm/llvm-project/pull/93977


More information about the llvm-commits mailing list