[llvm-branch-commits] [OpenMP] Support 'taskgraph' semantics via new libomp API entry points (PR #194051)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Apr 24 13:55:21 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Julian Brown (jtb20)

<details>
<summary>Changes</summary>

This patch supports the OpenMP 6.0 'taskgraph' directive via several
new API entry points for libomp.

New entry points are used for several reasons.  The first reason
is to pass extra information from the compiler relevant to the
taskgraph-recording case -- e.g. the __kmpc_taskgraph_task entry point
has extra arguments relating to shared data.  The second reason is to
reduce the potential overhead of the taskgraph implementation on the
rest of the runtime.  A third intention is to capture OpenMP semantics
at a slightly higher level: in particular when we come to add offload
target tasks to this implementation, those will also use new API entry
points to hopefully allow dependencies to be handled entirely on the GPU,
rather than by being wrapped in a host task.

commit-id:87387b9b


---

Patch is 39.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/194051.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+474-205) 
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+3-1) 
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+7-5) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+16) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+11) 


``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 369de6f05b098..5361e830dcab3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Frontend/OpenMP/OMP.h.inc"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
@@ -2249,24 +2250,100 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF,
   if (!CGF.HaveInsertPoint())
     return;
 
+  // The nogroup clause doesn't support an argument yet.  FIXME.
+  const OMPNogroupClause *NoGroupClause =
+    D.getSingleClause<OMPNogroupClause>();
+  llvm::Value *NoGroup;
+  if (NoGroupClause) {
+    NoGroup = CGF.Builder.getInt32(1);
+  } else {
+    NoGroup = CGF.Builder.getInt32(0);
+  }
+
+  const OMPGraphResetClause *GraphResetClause =
+      D.getSingleClause<OMPGraphResetClause>();
+  llvm::Value *GraphReset;
+  if (GraphResetClause) {
+    const Expr *Cond = GraphResetClause->getCondition();
+    llvm::Value *CondVal = CGF.EvaluateExprAsBool(Cond);
+    GraphReset =
+      CGF.Builder.CreateIntCast(CondVal, CGF.IntTy, /*isSigned=*/true);
+  } else {
+    GraphReset = CGF.Builder.getInt32(0);
+  }
+
+  llvm::Value *GraphId = CGF.Builder.getInt32(0);
+  const OMPGraphIdClause *GraphIdClause = D.getSingleClause<OMPGraphIdClause>();
+  if (GraphIdClause) {
+    const auto *E = GraphIdClause->getId();
+    auto *GraphIdVal = CGF.EmitScalarExpr(E);
+    GraphId =
+      CGF.Builder.CreateIntCast(GraphIdVal, CGM.Int32Ty, /*isSigned=*/false);
+  }
+
   CodeGenFunction OutlinedCGF(CGM, /*suppressNewContext=*/true);
 
   const auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+
+  auto BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+    CodeGenFunction::OMPWithinTaskgraphRAII WithinTaskgraph(CGF);
+    CGF.EmitStmt(CS->getCapturedStmt());
+  };
+
   LValue CapStruct = CGF.InitCapturedStruct(*CS);
+  CGOpenMPTaskgraphRegionInfo TaskgraphRegion(*CS, BodyGen);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(OutlinedCGF,
+                                                  &TaskgraphRegion);
 
   llvm::Function *FnT = OutlinedCGF.GenerateCapturedStmtFunction(*CS);
 
-  llvm::Value *CapturedArgsPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy);
+  // Create an internal-linkage global variable to hold the taskgraph handle.
+  std::string GraphHandleName = getName({"omp", "taskgraph", "handle"});
+  auto *GraphHandle =
+    new llvm::GlobalVariable(CGM.getModule(), CGM.VoidPtrTy,
+                             /*IsConstant=*/false,
+                             llvm::GlobalValue::InternalLinkage,
+                             llvm::Constant::getNullValue(CGM.VoidPtrTy),
+                             GraphHandleName);
 
-  auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
-    Action.Enter(CGF);
+  std::array<llvm::Value *, 8> Args{
+      emitUpdateLocation(CGF, Loc),
+      getThreadID(CGF, Loc),
+      GraphHandle,
+      GraphId,
+      GraphReset,
+      NoGroup,
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(FnT, CGM.VoidPtrTy),
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy)};
+
+  auto &&ThenGen = [&CGF, this, &Args](CodeGenFunction &, PrePostActionTy &) {
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+          CGM.getModule(), OMPRTL___kmpc_taskgraph),
+        Args);
+  };
+  auto &&ElseGen = [&CGF, this, &FnT, &CapStruct, &Loc, &OutlinedCGF]
+    (CodeGenFunction &, PrePostActionTy &) {
+      llvm::Value *CapturedArgsPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy);
+
+    auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
+      Action.Enter(CGF);
       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc,
-        FnT,
-        CapturedArgsPtr);
+          FnT,
+          CapturedArgsPtr);
+    };
+    RegionCodeGenTy RCG(CodeGen);
+    RCG(CGF);
   };
-  RegionCodeGenTy RCG(CodeGen);
-  RCG(CGF);
+
+  if (IfCond) {
+    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
+  } else {
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                            CGM.getModule(), OMPRTL___kmpc_taskgraph),
+                        Args);
+  }
 }
 
 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
@@ -3805,7 +3882,9 @@ CGOpenMPRuntime::TaskResultTy
 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
                               const OMPExecutableDirective &D,
                               llvm::Function *TaskFunction, QualType SharedsTy,
-                              Address Shareds, const OMPTaskDataTy &Data) {
+                              Address Shareds, const OMPTaskDataTy &Data,
+                              bool ForTaskgraph,
+                              std::array<llvm::Value *, 3> &TaskAllocArgs) {
   ASTContext &C = CGM.getContext();
   llvm::SmallVector<PrivateDataTy, 4> Privates;
   // Aggregate privates and sort them by the alignment.
@@ -3952,6 +4031,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
           TaskEntry, KmpRoutineEntryPtrTy)};
   llvm::Value *NewTask;
+  if (ForTaskgraph) {
+    TaskAllocArgs[0] = TaskFlags;
+    TaskAllocArgs[1] = KmpTaskTWithPrivatesTySize;
+    TaskAllocArgs[2] = SharedsSize;
+  }
   if (D.hasClausesOfKind<OMPNowaitClause>()) {
     // Check if we have any device clause associated with the directive.
     const Expr *Device = nullptr;
@@ -4709,114 +4793,168 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
   if (!CGF.HaveInsertPoint())
     return;
 
-  TaskResultTy Result =
-      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
-  llvm::Value *NewTask = Result.NewTask;
-  llvm::Function *TaskEntry = Result.TaskEntry;
-  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
-  LValue TDBase = Result.TDBase;
-  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
-  // Process list of dependences.
-  Address DependenciesArray = Address::invalid();
-  llvm::Value *NumOfElements;
-  std::tie(NumOfElements, DependenciesArray) =
-      emitDependClause(CGF, Data.Dependences, Loc);
-
-  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
-  // libcall.
-  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
-  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
-  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
-  // list is not empty
-  llvm::Value *ThreadID = getThreadID(CGF, Loc);
-  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
-  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
-  llvm::Value *DepTaskArgs[7];
-  if (!Data.Dependences.empty()) {
-    DepTaskArgs[0] = UpLoc;
-    DepTaskArgs[1] = ThreadID;
-    DepTaskArgs[2] = NewTask;
-    DepTaskArgs[3] = NumOfElements;
-    DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
-    DepTaskArgs[5] = CGF.Builder.getInt32(0);
-    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
-  }
-  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
-                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
-    if (!Data.Tied) {
-      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
-      LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
-      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+  auto &&TaskgraphTaskCodeGen =
+      [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, &Data]
+        (CodeGenFunction &CGF, PrePostActionTy &) {
+    llvm::Value *ThreadId = getThreadID(CGF, Loc);
+    llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+    std::array<llvm::Value *, 9> TGTaskArgs;
+    std::array<llvm::Value *, 3> TaskAllocArgs;
+    TaskResultTy Result =
+        emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true,
+                      TaskAllocArgs);
+    Address DependenciesArray = Address::invalid();
+    llvm::Value *NumOfElements;
+    std::tie(NumOfElements, DependenciesArray) =
+        emitDependClause(CGF, Data.Dependences, Loc);
+    TGTaskArgs[0] = UpLoc;
+    TGTaskArgs[1] = ThreadId;
+    TGTaskArgs[2] = Result.NewTask;
+    TGTaskArgs[3] = TaskAllocArgs[0]; // TaskFlags
+    TGTaskArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize
+    TGTaskArgs[5] = Shareds.emitRawPointer(CGF);
+    TGTaskArgs[6] = TaskAllocArgs[2]; // SharedsSize
+    if (auto RecType = dyn_cast<RecordType>(SharedsTy)) {
+      auto *RD = RecType->getAsRecordDecl();
+      if (RD->fields().empty()) {
+        // FIXME: The condition might not be precisely correct here.
+        TGTaskArgs[6] = CGF.Builder.getSize(0);
+      }
     }
-    if (!Data.Dependences.empty()) {
-      CGF.EmitRuntimeCall(
-          OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
-          DepTaskArgs);
+    if (Data.Dependences.size() == 0) {
+      TGTaskArgs[7] = CGF.Builder.getInt32(0);
+      TGTaskArgs[8] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
     } else {
-      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                              CGM.getModule(), OMPRTL___kmpc_omp_task),
-                          TaskArgs);
+      TGTaskArgs[7] = NumOfElements;
+      TGTaskArgs[8] = DependenciesArray.emitRawPointer(CGF);
     }
-    // Check if parent region is untied and build return for untied task;
-    if (auto *Region =
-            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
-      Region->emitUntiedSwitch(CGF);
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+          CGM.getModule(), OMPRTL___kmpc_taskgraph_task),
+        TGTaskArgs);
   };
 
-  llvm::Value *DepWaitTaskArgs[7];
-  if (!Data.Dependences.empty()) {
-    DepWaitTaskArgs[0] = UpLoc;
-    DepWaitTaskArgs[1] = ThreadID;
-    DepWaitTaskArgs[2] = NumOfElements;
-    DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
-    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
-    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
-    DepWaitTaskArgs[6] =
-        llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
-  }
-  auto &M = CGM.getModule();
-  auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
-                        TaskEntry, &Data, &DepWaitTaskArgs,
-                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {
-    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
-    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
-    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
-    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
-    // is specified.
-    if (!Data.Dependences.empty())
-      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                              M, OMPRTL___kmpc_omp_taskwait_deps_51),
-                          DepWaitTaskArgs);
-    // Call proxy_task_entry(gtid, new_task);
-    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
-                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
-      Action.Enter(CGF);
-      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
-      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
-                                                          OutlinedFnArgs);
+  auto &&NonTaskgraphTaskCodeGen =
+      [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfCond, &Data]
+        (CodeGenFunction &CGF, PrePostActionTy &) {
+    std::array<llvm::Value*, 3> DummyArray;
+    TaskResultTy Result =
+        emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, false, DummyArray);
+    llvm::Value *NewTask = Result.NewTask;
+    llvm::Function *TaskEntry = Result.TaskEntry;
+    llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
+    LValue TDBase = Result.TDBase;
+    const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
+    // Process list of dependences.
+    Address DependenciesArray = Address::invalid();
+    llvm::Value *NumOfElements;
+    std::tie(NumOfElements, DependenciesArray) =
+        emitDependClause(CGF, Data.Dependences, Loc);
+
+    // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
+    // libcall.
+    // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
+    // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+    // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
+    // list is not empty
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+    llvm::Value *DepTaskArgs[7];
+    if (!Data.Dependences.empty()) {
+      DepTaskArgs[0] = UpLoc;
+      DepTaskArgs[1] = ThreadID;
+      DepTaskArgs[2] = NewTask;
+      DepTaskArgs[3] = NumOfElements;
+      DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
+      DepTaskArgs[5] = CGF.Builder.getInt32(0);
+      DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+    }
+    auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
+                          &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
+      if (!Data.Tied) {
+        auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+        LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+      }
+      if (!Data.Dependences.empty()) {
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(
+                CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
+            DepTaskArgs);
+      } else {
+        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                CGM.getModule(), OMPRTL___kmpc_omp_task),
+                            TaskArgs);
+      }
+      // Check if parent region is untied and build return for untied task;
+      if (auto *Region =
+              dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+        Region->emitUntiedSwitch(CGF);
     };
 
-    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t *new_task);
-    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t *new_task);
-    RegionCodeGenTy RCG(CodeGen);
-    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
-                              M, OMPRTL___kmpc_omp_task_begin_if0),
-                          TaskArgs,
-                          OMPBuilder.getOrCreateRuntimeFunction(
-                              M, OMPRTL___kmpc_omp_task_complete_if0),
-                          TaskArgs);
-    RCG.setAction(Action);
-    RCG(CGF);
+    llvm::Value *DepWaitTaskArgs[7];
+    if (!Data.Dependences.empty()) {
+      DepWaitTaskArgs[0] = UpLoc;
+      DepWaitTaskArgs[1] = ThreadID;
+      DepWaitTaskArgs[2] = NumOfElements;
+      DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
+      DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
+      DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+      DepWaitTaskArgs[6] =
+          llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
+    }
+    auto &M = CGM.getModule();
+    auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
+                          TaskEntry, &Data, &DepWaitTaskArgs,
+                          Loc](CodeGenFunction &CGF, PrePostActionTy &) {
+      CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+      // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
+      // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+      // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
+      // is specified.
+      if (!Data.Dependences.empty())
+        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                M, OMPRTL___kmpc_omp_taskwait_deps_51),
+                            DepWaitTaskArgs);
+      // Call proxy_task_entry(gtid, new_task);
+      auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
+                        Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
+        Action.Enter(CGF);
+        llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+        CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
+                                                            OutlinedFnArgs);
+      };
+
+      // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+      // kmp_task_t *new_task);
+      // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+      // kmp_task_t *new_task);
+      RegionCodeGenTy RCG(CodeGen);
+      CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
+                                M, OMPRTL___kmpc_omp_task_begin_if0),
+                            TaskArgs,
+                            OMPBuilder.getOrCreateRuntimeFunction(
+                                M, OMPRTL___kmpc_omp_task_complete_if0),
+                            TaskArgs);
+      RCG.setAction(Action);
+      RCG(CGF);
+    };
+
+    if (IfCond) {
+      emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+    } else {
+      RegionCodeGenTy ThenRCG(ThenCodeGen);
+      ThenRCG(CGF);
+    }
   };
 
-  if (IfCond) {
-    emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+  if (CGF.getOMPWithinTaskgraph()) {
+    // Lexically within taskgraph, always replayable.
+    RegionCodeGenTy TaskgraphRCG(TaskgraphTaskCodeGen);
+    TaskgraphRCG(CGF);
   } else {
-    RegionCodeGenTy ThenRCG(ThenCodeGen);
-    ThenRCG(CGF);
+    RegionCodeGenTy NonTaskgraphRCG(NonTaskgraphTaskCodeGen);
+    NonTaskgraphRCG(CGF);
   }
 }
 
@@ -4828,15 +4966,12 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
                                        const OMPTaskDataTy &Data) {
   if (!CGF.HaveInsertPoint())
     return;
-  TaskResultTy Result =
-      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
-  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
-  // libcall.
-  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
-  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
-  // sched, kmp_uint64 grainsize, void *task_dup);
-  llvm::Value *ThreadID = getThreadID(CGF, Loc);
-  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+
+  std::array<llvm::Value *, 3> TaskAllocArgs;
+  TaskResultTy TaskInitResult =
+      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true,
+                    TaskAllocArgs);
+
   llvm::Value *IfVal;
   if (IfCond) {
     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
@@ -4845,68 +4980,165 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
   }
 
-  LValue LBLVal = CGF.EmitLValueForField(
-      Result.TDBase,
-      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
-  const auto *LBVar =
-      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
-  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
-                       /*IsInitializer=*/true);
-  LValue UBLVal = CGF.EmitLValueForField(
-      Result.TDBase,
-      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
-  const auto *UBVar =
-      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
-  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
-                       /*IsInitializer=*/true);
-  LValue StLVal = CGF.EmitLValueForField(
-      Result.TDBase,
-      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
-  const auto *StVar =
-      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl(...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/194051


More information about the llvm-branch-commits mailing list