[Mlir-commits] [llvm] [mlir] [Flang][OpenMP][Taskloop] Translation support for taskloop construct (PR #166903)

Kaviya Rajendiran llvmlistbot at llvm.org
Thu Nov 6 23:23:21 PST 2025


https://github.com/kaviya2510 created https://github.com/llvm/llvm-project/pull/166903

Added translation support for taskloop construct.

>From e89e509c6fd46adf193168b35049420a7e0706a9 Mon Sep 17 00:00:00 2001
From: Kaviya Rajendiran <kaviyara2000 at gmail.com>
Date: Fri, 7 Nov 2025 12:49:53 +0530
Subject: [PATCH] [Flang][OpenMP] Translation support for taskloop construct

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  16 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   1 +
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 199 ++++++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 257 +++++++++++++++++-
 mlir/test/Target/LLVMIR/openmp-taskloop.mlir  | 151 ++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  15 +-
 6 files changed, 622 insertions(+), 17 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop.mlir

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index b3d7ab4acf303..18828380abd32 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1359,6 +1359,22 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// Generator for `#omp taskloop`
+  ///
+  /// \param Loc The location where the taskloop construct was encountered.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param LoopInfo Callback that return the CLI
+  /// \param LBVal Lowerbound value of loop
+  /// \param UBVal Upperbound value of loop
+  /// \param StepVal Step value of loop
+  /// \param Tied True if the task is tied, false if the task is untied.
+  LLVM_ABI InsertPointOrErrorTy createTaskloop(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB,
+      llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
+      Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true);
+
   /// Generator for `#omp task`
   ///
   /// \param Loc The location where the task construct was encountered.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 46b3d53a4b408..032495dfe9d61 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -95,6 +95,7 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
 __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
 __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
 __OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
+__OMP_STRUCT_TYPE(Taskloop, kmp_task_info, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr, Int64, Int64, Int64)
 __OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
                   Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32, Int32)
 __OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fff9a815e5368..e88e722b1370e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1933,6 +1933,205 @@ static Value *emitTaskDependencies(
   return DepArray;
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    BodyGenCallbackTy BodyGenCB,
+    llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> loopInfo,
+    Value *LBVal, Value *UBVal, Value *StepVal, bool Tied) {
+
+  if (!updateToLocation(Loc))
+    return InsertPointTy();
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+
+  BasicBlock *TaskloopExitBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.exit");
+  BasicBlock *TaskloopBodyBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.body");
+  BasicBlock *TaskloopAllocaBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.alloca");
+
+  InsertPointTy TaskloopAllocaIP =
+      InsertPointTy(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+  InsertPointTy TaskloopBodyIP =
+      InsertPointTy(TaskloopBodyBB, TaskloopBodyBB->begin());
+
+  if (Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
+    return Err;
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> result = loopInfo();
+  if (!result) {
+    return result.takeError();
+  }
+
+  llvm::CanonicalLoopInfo *CLI = result.get();
+  OutlineInfo OI;
+  OI.EntryBB = TaskloopAllocaBB;
+  OI.OuterAllocaBB = AllocaIP.getBlock();
+  OI.ExitBB = TaskloopExitBB;
+
+  // Add the thread ID argument.
+  SmallVector<Instruction *, 4> ToBeDeleted;
+  // dummy instruction to be used as a fake argument
+  OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+      Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false));
+
+  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+                      TaskloopAllocaBB, CLI, Loc,
+                      ToBeDeleted](Function &OutlinedFn) mutable {
+    // Replace the Stale CI by appropriate RTL function call.
+    assert(OutlinedFn.hasOneUse() &&
+           "there must be a single user for the outlined function");
+    CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+
+    // HasShareds is true if any variables are captured in the outlined region,
+    // false otherwise.
+    bool HasShareds = StaleCI->arg_size() > 1;
+    Builder.SetInsertPoint(StaleCI);
+
+    // Gather the arguments for emitting the runtime call for
+    // @__kmpc_omp_task_alloc
+    Function *TaskAllocFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+
+    Value *ThreadID = getOrCreateThreadID(Ident);
+
+    // Emit runtime call for @__kmpc_taskgroup
+    Function *TaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+    Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+
+    // The flags are set to 1 if the task is tied, 0 otherwise.
+    Value *Flags = Builder.getInt32(Tied);
+
+    Value *TaskSize = Builder.getInt64(
+        divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
+
+    Value *SharedsSize = Builder.getInt64(0);
+    if (HasShareds) {
+      AllocaInst *ArgStructAlloca =
+          dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+      assert(ArgStructAlloca &&
+             "Unable to find the alloca instruction corresponding to arguments "
+             "for extracted function");
+      StructType *ArgStructType =
+          dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+      assert(ArgStructType && "Unable to find struct type corresponding to "
+                              "arguments for extracted function");
+      SharedsSize =
+          Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+    }
+
+    // Emit the @__kmpc_omp_task_alloc runtime call
+    // The runtime call returns a pointer to an area where the task captured
+    // variables must be copied before the task is run (TaskData)
+    CallInst *TaskData = Builder.CreateCall(
+        TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+                      /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
+                      /*task_func=*/&OutlinedFn});
+
+    // Get the pointer to loop lb, ub, step from task ptr
+    // and set up the lowerbound,upperbound and step values
+    llvm::Value *lb =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5);
+    //  Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty());
+    Builder.CreateStore(LBVal, lb);
+
+    llvm::Value *ub =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6);
+    Builder.CreateStore(UBVal, ub);
+
+    llvm::Value *step =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7);
+    Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty());
+    Builder.CreateStore(Step_ext, step);
+    llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step);
+
+    if (HasShareds) {
+      Value *Shareds = StaleCI->getArgOperand(1);
+      Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
+      Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
+      Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
+                           SharedsSize);
+    }
+
+    // set up the arguments for emitting kmpc_taskloop runtime call
+    // setting default values for ifval, nogroup, sched, grainsize, task_dup
+    Value *IfVal = Builder.getInt32(1);
+    Value *NoGroup = Builder.getInt32(1);
+    Value *Sched = Builder.getInt32(0);
+    Value *GrainSize = Builder.getInt64(0);
+    Value *TaskDup = Constant::getNullValue(Builder.getPtrTy());
+
+    Value *Args[] = {Ident,    ThreadID, TaskData, IfVal,     lb,     ub,
+                     loadstep, NoGroup,  Sched,    GrainSize, TaskDup};
+
+    // taskloop runtime call
+    Function *TaskloopFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
+    Builder.CreateCall(TaskloopFn, Args);
+
+    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
+    Function *EndTaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+    Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+
+    StaleCI->eraseFromParent();
+
+    Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+
+    if (HasShareds) {
+      LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
+      OutlinedFn.getArg(1)->replaceUsesWithIf(
+          Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
+    }
+
+    Value *IV = CLI->getIndVar();
+    Type *IVTy = IV->getType();
+    Constant *One = ConstantInt::get(IVTy, 1);
+
+    Value *task_lb = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 5, "gep_lb");
+    Value *LowerBound = Builder.CreateLoad(IVTy, task_lb, "lb");
+
+    Value *task_ub = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 6, "gep_ub");
+    Value *UpperBound = Builder.CreateLoad(IVTy, task_ub, "ub");
+
+    Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+
+    Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound);
+    Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
+    // set the trip count in the CLI
+    CLI->setTripCount(TripCount);
+
+    Builder.SetInsertPoint(CLI->getBody(),
+                           CLI->getBody()->getFirstInsertionPt());
+
+    llvm::BasicBlock *Body = CLI->getBody();
+    for (llvm::Instruction &I : *Body) {
+      if (auto *Add = llvm::dyn_cast<llvm::BinaryOperator>(&I)) {
+        if (Add->getOpcode() == llvm::Instruction::Add) {
+          if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) {
+            // update the starting index of the loop
+            Add->setOperand(1, LowerBound);
+          }
+        }
+      }
+    }
+
+    for (Instruction *I : llvm::reverse(ToBeDeleted)) {
+      I->eraseFromParent();
+    }
+  };
+
+  addOutlineInfo(std::move(OI));
+  Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->begin());
+  return Builder.saveIP();
+}
+
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8edec990eaaba..d69fcd3db0413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -323,6 +323,18 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkFinal = [&todo](auto op, LogicalResult &result) {
+    if (op.getFinal())
+      result = todo("final");
+  };
+  auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
+    if (op.getGrainsize())
+      result = todo("grainsize");
+  };
+  auto checkIf = [](auto op, LogicalResult &) {
+    if (op.getIfExpr())
+      op.emitWarning("if");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -340,10 +352,22 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
       result = todo("linear");
   };
+  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
+    if (op.getMergeable())
+      result = todo("mergeable");
+  };
+  auto checkNogroup = [&todo](auto op, LogicalResult &result) {
+    if (op.getNogroup())
+      result = todo("nogroup");
+  };
   auto checkNowait = [&todo](auto op, LogicalResult &result) {
     if (op.getNowait())
       result = todo("nowait");
   };
+  auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
+    if (op.getNumTasks())
+      result = todo("num_tasks");
+  };
   auto checkOrder = [&todo](auto op, LogicalResult &result) {
     if (op.getOrder() || op.getOrderMod())
       result = todo("order");
@@ -417,7 +441,15 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkNowait(op, result);
       })
       .Case([&](omp::TaskloopOp op) {
-        // TODO: Add other clauses check
+        checkAllocate(op, result);
+        checkFinal(op, result);
+        checkGrainsize(op, result);
+        checkIf(op, result);
+        checkInReduction(op, result);
+        checkMergeable(op, result);
+        checkNogroup(op, result);
+        checkNumTasks(op, result);
+        checkReduction(op, result);
         checkUntied(op, result);
         checkPriority(op, result);
       })
@@ -2097,6 +2129,8 @@ class TaskContextStructManager {
   /// private decls.
   void createGEPsToPrivateVars();
 
+  llvm::Value *isAllocated();
+
   /// De-allocate the task context structure.
   void freeStructPtr();
 
@@ -2177,13 +2211,26 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
   }
 }
 
+llvm::Value *TaskContextStructManager::isAllocated() {
+  if (!structPtr)
+    return nullptr;
+
+  return builder.CreateIsNotNull(structPtr);
+}
+
 void TaskContextStructManager::freeStructPtr() {
   if (!structPtr)
     return;
 
   llvm::IRBuilderBase::InsertPointGuard guard{builder};
-  // Ensure we don't put the call to free() after the terminator
-  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  llvm::BasicBlock *currentBlock = builder.GetInsertBlock();
+  if (currentBlock->getTerminator()) {
+    // Ensure we don't put the call to free() after the terminator
+    builder.SetInsertPoint(currentBlock->getTerminator());
+  } else {
+    // Insert the call to free() at the end of the current block
+    builder.SetInsertPoint(currentBlock);
+  }
   builder.CreateFree(structPtr);
 }
 
@@ -2419,6 +2466,207 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
+// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto taskloopOp = cast<omp::TaskloopOp>(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  // It stores the pointer of allocated firstprivate copies,
+  // which can be used later for freeing the allocated space.
+  SmallVector<llvm::Value *> llvmFirstPrivateVars;
+  PrivateVarsInfo privateVarsInfo(taskloopOp);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateVarsInfo.privatizers};
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+
+  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+  llvm::BasicBlock *taskloopStartBlock = llvm::BasicBlock::Create(
+      builder.getContext(), "omp.taskloop.start",
+      /*Parent=*/builder.GetInsertBlock()->getParent());
+  llvm::Instruction *branchToTaskloopStartBlock =
+      builder.CreateBr(taskloopStartBlock);
+  builder.SetInsertPoint(branchToTaskloopStartBlock);
+
+  llvm::BasicBlock *copyBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+  llvm::BasicBlock *initBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+  LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+      moduleTranslation, allocaIP);
+
+  // Allocate and initialize private variables
+  builder.SetInsertPoint(initBlock->getTerminator());
+
+  taskStructMgr.generateTaskContextStruct();
+  taskStructMgr.createGEPsToPrivateVars();
+
+  llvmFirstPrivateVars.resize(privateVarsInfo.blockArgs.size());
+  int index = 0;
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
+                       privateVarsInfo.blockArgs,
+                       taskStructMgr.getLLVMPrivateVarGEPs())) {
+    // To be handled inside the taskloop.
+    if (!privDecl.readsFromMold())
+      continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
+
+    llvm::Expected<llvm::Value *> privateVarOrErr =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (!privateVarOrErr)
+      return handleError(privateVarOrErr, *taskloopOp.getOperation());
+
+    llvmFirstPrivateVars[index++] = privateVarOrErr.get();
+
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+  }
+
+  // firstprivate copy region
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
+  if (failed(copyFirstPrivateVars(
+          taskloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
+          taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
+          taskloopOp.getPrivateNeedsBarrier())))
+    return llvm::failure();
+
+  // Set up inserttion point for call to createTaskloop()
+  builder.SetInsertPoint(taskloopStartBlock);
+
+  auto bodyCB = [&](InsertPointTy allocaIP,
+                    InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // translate the body of the taskloop:
+    builder.restoreIP(codegenIP);
+
+    llvm::BasicBlock *privInitBlock = nullptr;
+    privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
+    for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
+             privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
+             privateVarsInfo.mlirVars))) {
+      auto [blockArg, privDecl, mlirPrivVar] = zip;
+      // This is handled before the task executes
+      if (privDecl.readsFromMold())
+        continue;
+
+      llvm::IRBuilderBase::InsertPointGuard guard(builder);
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (!privateVarOrError)
+        return privateVarOrError.takeError();
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      privateVarsInfo.llvmVars[i] = privateVarOrError.get();
+      // Add private var to  llvmFirstPrivateVars
+      llvmFirstPrivateVars[index++] = privateVarOrError.get();
+    }
+
+    taskStructMgr.createGEPsToPrivateVars();
+    for (auto [i, llvmPrivVar] :
+         llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
+      if (!llvmPrivVar) {
+        assert(privateVarsInfo.llvmVars[i] &&
+               "This is added in the loop above");
+        continue;
+      }
+      privateVarsInfo.llvmVars[i] = llvmPrivVar;
+    }
+
+    // Find and map the addresses of each variable within the taskloop context
+    // structure
+    for (auto [blockArg, llvmPrivateVar, privateDecl] :
+         llvm::zip_equal(privateVarsInfo.blockArgs, privateVarsInfo.llvmVars,
+                         privateVarsInfo.privatizers)) {
+      // This was handled above.
+      if (!privateDecl.readsFromMold())
+        continue;
+      // Fix broken pass-by-value case for Fortran character boxes
+      if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+        llvmPrivateVar = builder.CreateLoad(
+            moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar);
+      }
+      assert(llvmPrivateVar->getType() ==
+             moduleTranslation.convertType(blockArg.getType()));
+      moduleTranslation.mapValue(blockArg, llvmPrivateVar);
+    }
+
+    auto continuationBlockOrError =
+        convertOmpOpRegions(taskloopOp.getRegion(), "omp.taskloop.region",
+                            builder, moduleTranslation);
+    ;
+    if (failed(handleError(continuationBlockOrError, opInst)))
+      return llvm::make_error<PreviouslyReportedError>();
+
+    builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
+
+    // dummy check to ensure that the task context structure is accessed inside
+    // the outlined fn.
+    llvm::Value *cond = taskStructMgr.isAllocated();
+    return llvm::Error::success();
+  };
+
+  auto loopOp = cast<omp::LoopNestOp>(taskloopOp.getWrappedLoop());
+
+  auto loopInfo = [&]() -> llvm::Expected<llvm::CanonicalLoopInfo *> {
+    llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
+    return loopInfo;
+  };
+
+  llvm::OpenMPIRBuilder &ompBuilder = *moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createTaskloop(
+          ompLoc, allocaIP, bodyCB, loopInfo,
+          moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[0]),
+          moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[0]),
+          moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]));
+
+  if (failed(handleError(afterIP, opInst)))
+    return failure();
+
+  builder.restoreIP(*afterIP);
+
+  // freeing the task context structure in exit block of taskloop.
+  if (failed(cleanupPrivateVars(builder, moduleTranslation, taskloopOp.getLoc(),
+                                llvmFirstPrivateVars,
+                                privateVarsInfo.privatizers)))
+    return failure();
+
+  taskStructMgr.freeStructPtr();
+
+  return success();
+}
+
 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
@@ -6224,6 +6472,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
           .Case([&](omp::TaskOp op) {
             return convertOmpTaskOp(op, builder, moduleTranslation);
           })
+          .Case([&](omp::TaskloopOp op) {
+            return convertOmpTaskloopOp(*op, builder, moduleTranslation);
+          })
           .Case([&](omp::TaskgroupOp op) {
             return convertOmpTaskgroupOp(op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
new file mode 100644
index 0000000000000..536a1fe9d9157
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -0,0 +1,151 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK:  %struct.kmp_task_info = type { ptr, ptr, i32, ptr, ptr, i64, i64, i64 }
+
+// CHECK-LABEL:  define void @_QPtest() {
+// CHECK:           %[[STRUCTARG:.*]] = alloca { ptr }, align 8
+// CHECK:           %[[VAL1:.*]] = alloca i32, i64 1, align 4
+// CHECK:           %[[VAL_X:.*]] = alloca i32, i64 1, align 4
+// CHECK:           store i32 20, ptr %[[VAL_X]], align 4
+// CHECK:           br label %entry
+
+// CHECK:         entry:
+// CHECK:           br label %omp.private.init
+
+// CHECK:         omp.private.init:                                 ; preds = %entry
+// CHECK:           %[[OMP_TASK_CONTEXT_PTR:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:           %[[PRIV_GEP:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
+// CHECK:           br label %omp.private.copy
+
+// CHECK:         omp.private.copy:
+// CHECK:           br label %omp.private.copy1
+
+// CHECK:         omp.private.copy1:
+// CHECK:           %[[LOAD_X:.*]] = load i32, ptr %[[VAL_X]], align 4
+// CHECK:           store i32 %[[LOAD_X]], ptr %[[PRIV_GEP]], align 4
+// CHECK:           br label %omp.taskloop.start
+
+// CHECK:         omp.taskloop.start:
+// CHECK:           br label %codeRepl
+
+// CHECK:         codeRepl:
+// CHECK:           %[[GEP_OMP_TASK_CONTEXT_PTR:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
+// CHECK:           store ptr %[[OMP_TASK_CONTEXT_PTR]], ptr %[[GEP_OMP_TASK_CONTEXT_PTR]], align 8
+// CHECK:           %[[GTID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:           call void @__kmpc_taskgroup(ptr @1, i32 %[[GTID]])
+// CHECK:           %[[TASK_PTR:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[GTID]], i32 1, i64 64, i64 8, ptr @_QPtest..omp_par)
+// CHECK:           %[[LB_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_task_info, ptr %[[TASK_PTR]], i32 0, i32 5
+// CHECK:           store i32 1, ptr %[[LB_GEP]], align 4
+// CHECK:           %[[UB_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_task_info, ptr %[[TASK_PTR]], i32 0, i32 6
+// CHECK:           store i32 5, ptr %[[UB_GEP]], align 4
+// CHECK:           %[[STEP_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_task_info, ptr %[[TASK_PTR]], i32 0, i32 7
+// CHECK:           store i64 1, ptr %[[STEP_GEP]], align 4
+// CHECK:           %[[LOAD_STEP:.*]] = load i64, ptr %[[STEP_GEP]], align 4
+// CHECK:           %10 = load ptr, ptr %[[TASK_PTR]], align 8
+// CHECK:           call void @llvm.memcpy.p0.p0.i64(ptr align 1 %10, ptr align 1 %[[STRUCTARG]], i64 8, i1 false)
+// CHECK:           call void @__kmpc_taskloop(ptr @1, i32 %[[GTID]], ptr %[[TASK_PTR]], i32 1, ptr %[[LB_GEP]], ptr %[[UB_GEP]], i64 %[[LOAD_STEP]], i32 1, i32 0, i64 0, ptr null)
+// CHECK:           call void @__kmpc_end_taskgroup(ptr @1, i32 %[[GTID]])
+// CHECK:           br label %taskloop.exit
+
+// CHECK:           taskloop.exit:
+// CHECK:             tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])
+// CHECK:             ret void
+// CHECK:           }
+
+// CHECK-LABEL:   define internal void @_QPtest..omp_par
+// CHECK-SAME:       i32 %[[GLOBAL_TID:.*]], ptr %[[TASK_PTR1:.*]]) {
+// CHECK:           taskloop.alloca:
+// CHECK:           %[[LOAD_TASK_PTR:.*]] = load ptr, ptr %[[TASK_PTR1]], align 8
+// CHECK:           %[[GEP_LB:.*]] = getelementptr inbounds nuw %struct.kmp_task_info, ptr %[[TASK_PTR1]], i32 0, i32 5
+// CHECK:           %[[LB:.*]] = load i32, ptr %[[GEP_LB]], align 4
+// CHECK:           %[[GEP_UB:.*]] = getelementptr inbounds nuw %struct.kmp_task_info, ptr %[[TASK_PTR1]], i32 0, i32 6
+// CHECK:           %[[UB:.*]] = load i32, ptr %[[GEP_UB]], align 4
+// CHECK:           %[[GEP_OMP_TASK_CONTEXT_PTR:.*]] = getelementptr { ptr }, ptr %[[LOAD_TASK_PTR]], i32 0, i32 0
+// CHECK:           %[[LOADGEP_OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[GEP_OMP_TASK_CONTEXT_PTR]], align 8, !align !1
+// CHECK:           %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
+// CHECK:           br label %taskloop.body
+
+// CHECK:           taskloop.body:
+// CHECK:             %[[LOAD_X:.*]] = getelementptr { i32 }, ptr %[[LOADGEP_OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
+// CHECK:             br label %omp.taskloop.region
+
+// CHECK:           omp.taskloop.region:
+// CHECK:             br label %omp_loop.preheader
+
+// CHECK:           omp_loop.preheader:
+// CHECK:             %[[VAL2:.*]] = sub i32 %[[UB]], %[[LB]]
+// CHECK:             %[[TRIP_CNT:.*]] = add i32 %[[VAL2]], 1
+// CHECK:             br label %omp_loop.header
+
+// CHECK:           omp_loop.header:
+// CHECK:             %[[OMP_LOOP_IV:.*]] = phi i32 [ 0, %omp_loop.preheader ], [ %omp_loop.next, %omp_loop.inc ]
+// CHECK:             br label %omp_loop.cond
+
+// CHECK:           omp_loop.cond:
+// CHECK:             %[[OMP_LOOP_CMP:.*]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[TRIP_CNT]]
+// CHECK:             br i1 %[[OMP_LOOP_CMP]], label %omp_loop.body, label %omp_loop.exit
+
+// CHECK:           omp_loop.exit:
+// CHECK:             br label %omp_loop.after
+
+// CHECK:           omp_loop.after:
+// CHECK:             br label %omp.region.cont
+
+// CHECK:           omp.region.cont:
+// CHECK:             %[[IS_ALLOCATED:.*]] = icmp ne ptr %[[LOADGEP_OMP_TASK_CONTEXT_PTR]], null
+// CHECK:             br label %taskloop.exit.exitStub
+
+// CHECK:           omp_loop.body:
+// CHECK:             %[[VAL3:.*]] = mul i32 %[[OMP_LOOP_IV]], 1
+// CHECK:             %[[VAL5:.*]] = add i32 %[[VAL3]], %[[LB]]
+// CHECK:             br label %omp.loop_nest.region
+
+// CHECK:           omp.loop_nest.region:
+// CHECK:             store i32 %[[VAL5]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
+// CHECK:             %[[VAL6:.*]] = load i32, ptr %[[LOAD_X]], align 4
+// CHECK:             %[[RES:.*]] = add i32 %[[VAL6]], 1
+// CHECK:             store i32 %[[RES]], ptr %[[LOAD_X]], align 4
+// CHECK:             br label %omp.region.cont2
+
+// CHECK:           omp.region.cont2:
+// CHECK:             br label %omp_loop.inc
+
+// CHECK:           omp_loop.inc:
+// CHECK:             %omp_loop.next = add nuw i32 %[[OMP_LOOP_IV]], 1
+// CHECK:             br label %omp_loop.header
+
+// CHECK:           taskloop.exit.exitStub:
+// CHECK:             ret void
+// CHECK:           }
\ No newline at end of file
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index af6d254cfd3c3..d33cb7e4708b4 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -360,21 +360,8 @@ llvm.func @taskgroup_task_reduction(%x : !llvm.ptr) {
 
 // -----
 
-llvm.func @taskloop(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error at below {{not yet implemented: omp.taskloop}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
 llvm.func @taskloop_untied(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error at below {{not yet implemented: omp.taskloop}}
+  // expected-error at below {{not yet implemented: Unhandled clause untied in omp.taskloop operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
   omp.taskloop untied {
     omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {



More information about the Mlir-commits mailing list