[llvm-branch-commits] [llvm] [mlir] [MLIR][OpenMP] Add Initial Taskloop Clause Support (PR #174623)

Jack Styles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 7 01:09:30 PST 2026


https://github.com/Stylie777 updated https://github.com/llvm/llvm-project/pull/174623

>From 3a51d73ac784f9cfc07bf02545820e390d60eb0b Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Fri, 19 Dec 2025 10:47:10 +0000
Subject: [PATCH 1/2] [MLIR][OpenMP] Add Initial Taskloop Clause Support

Following on from the work to implement MLIR -> LLVM IR
Translation for Taskloop, this adds support for the following
clauses to be used alongside taskloop:
- if
- grainsize
- num_tasks
- untied
- Nogroup
- Final
- Mergeable
- Priority

These clauses are ones which work directly through the relevant
OpenMP Runtime functions, so their information just needed collecting
from the relevant location and passing through to the appropriate
runtime function.

Remaining clauses retain their TODO message as they have not yet
been implemented.
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  23 ++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  58 +++++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  53 +++---
 .../Target/LLVMIR/openmp-taskloop-final.mlir  |  88 ++++++++++
 .../LLVMIR/openmp-taskloop-grainsize.mlir     |  88 ++++++++++
 .../Target/LLVMIR/openmp-taskloop-if.mlir     |  93 ++++++++++
 .../LLVMIR/openmp-taskloop-mergeable.mlir     |  87 ++++++++++
 .../openmp-taskloop-no-context-struct.mlir    |   2 +-
 .../LLVMIR/openmp-taskloop-nogroup.mlir       |  87 ++++++++++
 .../LLVMIR/openmp-taskloop-num_tasks.mlir     |  88 ++++++++++
 .../LLVMIR/openmp-taskloop-priority.mlir      |  88 ++++++++++
 .../Target/LLVMIR/openmp-taskloop-untied.mlir | 161 ++++++++++++++++++
 mlir/test/Target/LLVMIR/openmp-taskloop.mlir  |   2 +-
 mlir/test/Target/LLVMIR/openmp-todo.mlir      | 104 -----------
 14 files changed, 863 insertions(+), 159 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 7ba7a05ac71a3..a0b26963353b1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1464,7 +1464,22 @@ class OpenMPIRBuilder {
   /// \param LBVal Lowerbound value of loop
   /// \param UBVal Upperbound value of loop
   /// \param StepVal Step value of loop
-  /// \param Tied True if the task is tied, false if the task is untied.
+  /// \param Untied True if the task is untied, false if the task is tied.
+  /// \param IfCond i1 value. If it evaluates to `false`, an undeferred
+  ///               task is generated, and the encountering thread must
+  ///               suspend the current task region, for which execution
+  ///               cannot be resumed until execution of the structured
+  ///               block that is associated with the generated task is
+  ///               completed.
+  /// \param GrainSize Value of the GrainSize/Num of Tasks if present
+  /// \param NoGroup False if NoGroup is defined, true if not
+  /// \param Sched If Grainsize is defined, Sched is 1. Num Tasks, Shed is 2.
+  /// Otherwise Sched is 0
+  /// \param Final i1 value which is `true` if the task is final, `false` if the
+  ///              task is not final.
+  /// \param Mergeable If the given task is `mergeable`
+  /// \param Priority `priority-value' specifies the execution order of the
+  ///                 tasks that is generated by the construct
   /// \param DupCB The callback to generate the duplication code. See
   /// documentation for \ref TaskDupCallbackTy. This can be nullptr.
   /// \param TaskContextStructPtrVal If non-null, a pointer to  to be placed
@@ -1474,8 +1489,10 @@ class OpenMPIRBuilder {
       const LocationDescription &Loc, InsertPointTy AllocaIP,
       BodyGenCallbackTy BodyGenCB,
       llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
-      Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true,
-      TaskDupCallbackTy DupCB = nullptr,
+      Value *LBVal, Value *UBVal, Value *StepVal, bool Untied = false,
+      Value *IfCond = nullptr, Value *GrainSize = nullptr, bool NoGroup = false,
+      int Sched = 0, Value *Final = nullptr, bool Mergeable = false,
+      Value *Priority = nullptr, TaskDupCallbackTy DupCB = nullptr,
       Value *TaskContextStructPtrVal = nullptr);
 
   /// Generator for `#omp task`
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a71bcb0bb5392..580daed4f7d10 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2096,8 +2096,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB,
     llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
-    Value *LBVal, Value *UBVal, Value *StepVal, bool Tied,
-    TaskDupCallbackTy DupCB, Value *TaskContextStructPtrVal) {
+    Value *LBVal, Value *UBVal, Value *StepVal, bool Untied, Value *IfCond,
+    Value *GrainSize, bool NoGroup, int Sched, Value *Final, bool Mergeable,
+    Value *Priority, TaskDupCallbackTy DupCB, Value *TaskContextStructPtrVal) {
 
   if (!updateToLocation(Loc))
     return InsertPointTy();
@@ -2172,9 +2173,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
   }
   Value *TaskDupFn = *TaskDupFnOrErr;
 
-  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Untied,
                       TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
-                      FakeLB, FakeUB, FakeStep](Function &OutlinedFn) mutable {
+                      IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
+                      FakeStep, Final, Mergeable,
+                      Priority](Function &OutlinedFn) mutable {
     // Replace the Stale CI by appropriate RTL function call.
     assert(OutlinedFn.hasOneUse() &&
            "there must be a single user for the outlined function");
@@ -2207,8 +2210,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
         getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
     Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
 
-    // The flags are set to 1 if the task is tied, 0 otherwise.
-    Value *Flags = Builder.getInt32(Tied);
+    // `flags` Argument Configuration
+    // Task is tied if (Flags & 1) == 1.
+    // Task is untied if (Flags & 1) == 0.
+    // Task is final if (Flags & 2) == 2.
+    // Task is not final if (Flags & 2) == 0.
+    // Task is mergeable if (Flags & 4) == 4.
+    // Task is not mergeable if (Flags & 4) == 0.
+    // Task is priority if (Flags & 32) == 32.
+    // Task is not priority if (Flags & 32) == 0.
+    Value *Flags = Builder.getInt32(Untied ? 0 : 1);
+    if (Final)
+      Flags = Builder.CreateOr(Builder.getInt32(2), Flags);
+    if (Mergeable)
+      Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
+    if (Priority)
+      Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
 
     Value *TaskSize = Builder.getInt64(
         divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
@@ -2251,25 +2268,32 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
     llvm::Value *Loadstep = Builder.CreateLoad(Builder.getInt64Ty(), Step);
 
     // set up the arguments for emitting kmpc_taskloop runtime call
-    // setting default values for ifval, nogroup, sched, grainsize, task_dup
-    Value *IfVal = Builder.getInt32(1);
-    Value *NoGroup = Builder.getInt32(1);
-    Value *Sched = Builder.getInt32(0);
-    Value *GrainSize = Builder.getInt64(0);
+    // setting values for ifval, nogroup, sched, grainsize, task_dup
+    Value *IfCondVal =
+        IfCond ? Builder.CreateIntCast(IfCond, Builder.getInt32Ty(), true)
+               : Builder.getInt32(1);
+    Value *NoGroupVal = Builder.getInt32(NoGroup ? 1 : 0);
+    Value *SchedVal = Builder.getInt32(Sched);
+    Value *GrainSizeVal =
+        GrainSize ? Builder.CreateIntCast(GrainSize, Builder.getInt64Ty(), true)
+                  : Builder.getInt64(0);
     Value *TaskDup = TaskDupFn;
 
-    Value *Args[] = {Ident,    ThreadID, TaskData, IfVal,     Lb,     Ub,
-                     Loadstep, NoGroup,  Sched,    GrainSize, TaskDup};
+    Value *Args[] = {Ident,    ThreadID,   TaskData, IfCondVal,    Lb,     Ub,
+                     Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
 
     // taskloop runtime call
     Function *TaskloopFn =
         getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
     Builder.CreateCall(TaskloopFn, Args);
 
-    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
-    Function *EndTaskgroupFn =
-        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
-    Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup if
+    // nogroup is not defined
+    if (!NoGroup) {
+      Function *EndTaskgroupFn =
+          getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+      Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+    }
 
     StaleCI->eraseFromParent();
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6f4e9e3b2330a..eef71a17fb41d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -361,22 +361,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getFinal())
       result = todo("final");
   };
-  auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
-    if (op.getGrainsize())
-      result = todo("grainsize");
-  };
-  auto checkIf = [&todo](auto op, LogicalResult &result) {
-    if (op.getIfExpr())
-      result = todo("if");
-  };
-  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
-    if (op.getMergeable())
-      result = todo("mergeable");
-  };
-  auto checkNogroup = [&todo](auto op, LogicalResult &result) {
-    if (op.getNogroup())
-      result = todo("nogroup");
-  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -390,10 +374,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getNowait())
       result = todo("nowait");
   };
-  auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
-    if (op.getNumTasks())
-      result = todo("num_tasks");
-  };
   auto checkOrder = [&todo](auto op, LogicalResult &result) {
     if (op.getOrder() || op.getOrderMod())
       result = todo("order");
@@ -424,10 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         op.getTaskReductionSyms())
       result = todo("task_reduction");
   };
-  auto checkUntied = [&todo](auto op, LogicalResult &result) {
-    if (op.getUntied())
-      result = todo("untied");
-  };
 
   LogicalResult result = success();
   llvm::TypeSwitch<Operation &>(op)
@@ -471,16 +447,8 @@ static LogicalResult checkImplementationStatus(Operation &op) {
       })
       .Case([&](omp::TaskloopOp op) {
         checkAllocate(op, result);
-        checkFinal(op, result);
-        checkGrainsize(op, result);
-        checkIf(op, result);
         checkInReduction(op, result);
-        checkMergeable(op, result);
-        checkNogroup(op, result);
-        checkNumTasks(op, result);
         checkReduction(op, result);
-        checkUntied(op, result);
-        checkPriority(op, result);
       })
       .Case([&](omp::WsloopOp op) {
         checkAllocate(op, result);
@@ -2767,6 +2735,21 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
     return loopInfo;
   };
 
+  llvm::Value *ifCond = nullptr;
+  llvm::Value *grainsize = nullptr;
+  int sched = 0; // default
+  Value grainsizeVal = taskloopOp.getGrainsize();
+  Value numTasksVal = taskloopOp.getNumTasks();
+  if (Value ifVar = taskloopOp.getIfExpr())
+    ifCond = moduleTranslation.lookupValue(ifVar);
+  if (grainsizeVal) {
+    grainsize = moduleTranslation.lookupValue(grainsizeVal);
+    sched = 1; // grainsize
+  } else if (numTasksVal) {
+    grainsize = moduleTranslation.lookupValue(numTasksVal);
+    sched = 2; // num_tasks
+  }
+
   llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr;
   if (!taskStructMgr.getLLVMPrivateVarGEPs().empty())
     taskDupOrNull = taskDupCB;
@@ -2778,7 +2761,11 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
           moduleTranslation.lookupValue(loopOp.getLoopLowerBounds()[0]),
           moduleTranslation.lookupValue(loopOp.getLoopUpperBounds()[0]),
           moduleTranslation.lookupValue(loopOp.getLoopSteps()[0]),
-          /*Tied=*/true, taskDupOrNull, taskStructMgr.getStructPtr());
+          taskloopOp.getUntied(), ifCond, grainsize, taskloopOp.getNogroup(),
+          sched, moduleTranslation.lookupValue(taskloopOp.getFinal()),
+          taskloopOp.getMergeable(),
+          moduleTranslation.lookupValue(taskloopOp.getPriority()),
+          taskDupOrNull, taskStructMgr.getStructPtr());
 
   if (failed(handleError(afterIP, opInst)))
     return failure();
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
new file mode 100644
index 0000000000000..c1202d1ffab7b
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  %true = llvm.mlir.constant(true) : i1
+  omp.taskloop final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 3, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
new file mode 100644
index 0000000000000..a274e82adba76
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  %c2_i32 = llvm.mlir.constant(2 : i32) : i32
+  omp.taskloop grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %9, align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %5, i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 1, i64 2, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
new file mode 100644
index 0000000000000..d7e16102ae5af
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
@@ -0,0 +1,93 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  %a_val = llvm.load %3 : !llvm.ptr -> i32
+  %c20 = llvm.mlir.constant(20 : i32) : i32
+  %cmp = llvm.icmp "slt" %a_val, %c20 : i32
+  omp.taskloop if(%cmp) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   %[[VAL_3:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   %[[VAL_4:.*]] = icmp slt i32 %[[VAL_3]], 20
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_5:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_6:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_6]], ptr %[[VAL_5]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_7:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_8:.*]] = load ptr, ptr %[[VAL_7]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_8]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_8]], i32 0, i32 0
+// CHECK:   %[[VAL_10:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_8]], i32 0, i32 1
+// CHECK:   %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_8]], i32 0, i32 2
+// CHECK:   %[[VAL_12:.*]] = load i64, ptr %[[VAL_11]], align 4
+// CHECK:   %[[IF_VAL:.*]] = sext i1 %4 to i32
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_7]], i32 %[[IF_VAL]], ptr %[[VAL_9]], ptr %[[VAL_10]], i64 %[[VAL_12]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
new file mode 100644
index 0000000000000..54bd15b7d8ffa
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
@@ -0,0 +1,87 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop mergeable private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 5, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 43b50e7a3206c..3355a14e38ffd 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -54,7 +54,7 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 1
 // CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 2
 // CHECK:         %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]])
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = %[[VAL_9]]
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
new file mode 100644
index 0000000000000..7310b17cc04c4
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
@@ -0,0 +1,87 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop nogroup private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK-NOT:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
new file mode 100644
index 0000000000000..7f0f315ea7c2f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  %c2_i32 = llvm.mlir.constant(2: i32) : i32
+  omp.taskloop num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 2, i64 2, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
new file mode 100644
index 0000000000000..b2735f410cf8f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
@@ -0,0 +1,88 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  %c1_i32 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop priority(%c1_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 33, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
new file mode 100644
index 0000000000000..121363f76f900
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
@@ -0,0 +1,161 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @_QFtestEi_private_i32 : i32
+
+omp.private {type = firstprivate} @_QFtestEa_firstprivate_i32 : i32 copy {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  %0 = llvm.load %arg0 : !llvm.ptr -> i32
+  llvm.store %0, %arg1 : i32, !llvm.ptr
+  omp.yield(%arg1 : !llvm.ptr)
+}
+
+
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop untied private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 0, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
+
+// -----
+
+llvm.func @_QPtest_tied() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+  %6 = llvm.mlir.constant(20 : i32) : i32
+  llvm.store %6, %3 : i32, !llvm.ptr
+  %7 = llvm.mlir.constant(1 : i32) : i32
+  %8 = llvm.mlir.constant(5 : i32) : i32
+  %9 = llvm.mlir.constant(1 : i32) : i32
+  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+      llvm.store %arg2, %arg1 : i32, !llvm.ptr
+      %10 = llvm.load %arg0 : !llvm.ptr -> i32
+      %11 = llvm.mlir.constant(1 : i32) : i32
+      %12 = llvm.add %10, %11 : i32
+      llvm.store %12, %arg0 : i32, !llvm.ptr
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// CHECK: define void @_QPtest_tied() {
+// CHECK:   %[[structArg:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// CHECK:   %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+// CHECK:   %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+// CHECK:   store i32 20, ptr %2, align 4
+// CHECK:   br label %[[entry:.*]]
+
+// CHECK: entry:                                            ; preds = %0
+// CHECK:   br label %[[omp_private_init:.*]]
+
+// CHECK: omp.private.init:                                 ; preds = %[[entry:.*]]
+// CHECK:   %[[ctx_ptr:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64))
+// CHECK:   %[[VAL_3:.*]] = getelementptr { i32 }, ptr %[[ctx_ptr]], i32 0, i32 0
+// CHECK:   br label %[[omp_private_copy:.*]]
+
+// CHECK: omp.private.copy:                                 ; preds = %[[omp_private_init]]
+// CHECK:   br label %[[omp_private_copy1:.*]]
+
+// CHECK: omp.private.copy1:                                ; preds = %[[omp_private_copy]]
+// CHECK:   %[[VAL_4:.*]] = load i32, ptr %[[VAL_2]], align 4
+// CHECK:   store i32 %[[VAL_4]], ptr %[[VAL_3]], align 4
+// CHECK:   br label %[[omp_taskloop_start:.*]]
+
+// CHECK: omp.taskloop.start:                               ; preds = %[[omp_private_copy1]]
+// CHECK:   br label %[[codeRepl:.*]]
+
+// CHECK: codeRepl:                                         ; preds = %[[omp_taskloop_start]]
+// CHECK:   %[[gep_lb_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 0
+// CHECK:   store i64 1, ptr %[[gep_lb_val]], align 4
+// CHECK:   %[[gep_ub_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 1
+// CHECK:   store i64 5, ptr %[[gep_ub_val]], align 4
+// CHECK:   %[[gep_step_val:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 2
+// CHECK:   store i64 1, ptr %[[gep_step_val]], align 4
+// CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
+// CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
+// CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest_tied..omp_par)
+// CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
+// CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
+// CHECK:   %[[VAL_7:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 0
+// CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
+// CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
+// CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup.1)
+// CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
+// CHECK:   br label %taskloop.exit
+
+// CHECK: taskloop.exit:                                    ; preds = %[[codeRepl]]
+// CHECK:   ret void
+// CHECK: }
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index 5f31c547e7485..10962a0108e72 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -70,7 +70,7 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_20:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_18]], i32 0, i32 1
 // CHECK:         %[[VAL_21:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_18]], i32 0, i32 2
 // CHECK:         %[[VAL_22:.*]] = load i64, ptr %[[VAL_21]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_16]], ptr %[[VAL_17]], i32 1, ptr %[[VAL_19]], ptr %[[VAL_20]], i64 %[[VAL_22]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_16]], ptr %[[VAL_17]], i32 1, ptr %[[VAL_19]], ptr %[[VAL_20]], i64 %[[VAL_22]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_16]])
 // CHECK:         br label %[[VAL_23:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = %[[VAL_11]]
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 04120166622ee..a8a0bfda2a77d 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -347,45 +347,6 @@ llvm.func @taskloop_collapse(%lb : i32, %ub : i32, %step : i32, %lb1 : i32, %ub1
   llvm.return
 }
 
-// -----
-
-llvm.func @taskloop_final(%lb : i32, %ub : i32, %step : i32, %true : i1) {
-  // expected-error at below {{not yet implemented: Unhandled clause final in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop final(%true) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
-llvm.func @taskloop_grainsize(%lb : i32, %ub : i32, %step : i32, %grainsize : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause grainsize in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop grainsize(%grainsize: i32) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
-llvm.func @taskloop_if(%lb : i32, %ub : i32, %step : i32, %true : i1) {
-  // expected-error at below {{not yet implemented: Unhandled clause if in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop if(%true) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
 // -----
  omp.declare_reduction @add_reduction_i32 : i32 init {
   ^bb0(%arg0: i32):
@@ -408,58 +369,6 @@ llvm.func @taskloop_inreduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.pt
   llvm.return
 }
 
-// -----
-
-llvm.func @taskloop_mergeable(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause mergeable in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop mergeable {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
-llvm.func @taskloop_nogroup(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause nogroup in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop nogroup {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
-llvm.func @taskloop_num_tasks(%lb : i32, %ub : i32, %step : i32, %numtasks : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause num_tasks in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop num_tasks(%numtasks: i32) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
-llvm.func @taskloop_priority(%lb : i32, %ub : i32, %step : i32, %priority : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause priority in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop priority(%priority: i32) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
 // -----
  omp.declare_reduction @add_reduction_i32 : i32 init {
   ^bb0(%arg0: i32):
@@ -484,19 +393,6 @@ llvm.func @taskloop_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr)
 
 // -----
 
-llvm.func @taskloop_untied(%lb : i32, %ub : i32, %step : i32) {
-  // expected-error at below {{not yet implemented: Unhandled clause untied in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop untied {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
-    }
-  }
-  llvm.return
-}
-
-// -----
-
 llvm.func @taskwait_depend(%x: !llvm.ptr) {
   // expected-error at below {{not yet implemented: Unhandled clause depend in omp.taskwait operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.taskwait}}

>From 5a3b8bda6b43d939cf9e474608439347e0197c8b Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Wed, 7 Jan 2026 09:06:00 +0000
Subject: [PATCH 2/2] Respond to review comments

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp           | 13 ++++++++-----
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp    | 12 ++----------
 mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir  |  2 +-
 .../Target/LLVMIR/openmp-taskloop-grainsize.mlir    |  2 +-
 mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir     |  2 +-
 .../Target/LLVMIR/openmp-taskloop-mergeable.mlir    |  2 +-
 .../LLVMIR/openmp-taskloop-no-context-struct.mlir   |  2 +-
 .../test/Target/LLVMIR/openmp-taskloop-nogroup.mlir |  2 +-
 .../Target/LLVMIR/openmp-taskloop-num_tasks.mlir    |  2 +-
 .../Target/LLVMIR/openmp-taskloop-priority.mlir     |  2 +-
 mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir |  2 +-
 mlir/test/Target/LLVMIR/openmp-taskloop.mlir        |  2 +-
 12 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 580daed4f7d10..7b055624233d6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2205,10 +2205,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
 
     Value *ThreadID = getOrCreateThreadID(Ident);
 
-    // Emit runtime call for @__kmpc_taskgroup
-    Function *TaskgroupFn =
-        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
-    Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+    if(!NoGroup) {
+      // Emit runtime call for @__kmpc_taskgroup
+      Function *TaskgroupFn =
+          getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+      Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+    }
 
     // `flags` Argument Configuration
     // Task is tied if (Flags & 1) == 1.
@@ -2272,7 +2274,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
     Value *IfCondVal =
         IfCond ? Builder.CreateIntCast(IfCond, Builder.getInt32Ty(), true)
                : Builder.getInt32(1);
-    Value *NoGroupVal = Builder.getInt32(NoGroup ? 1 : 0);
+    // As __kmpc_taskgroup is called manually in OMPIRBuilder, NoGroupVal should always be 1 when calling __kmpc_taskloop to ensure it is not called again
+    Value *NoGroupVal = Builder.getInt32(1);
     Value *SchedVal = Builder.getInt32(Sched);
     Value *GrainSizeVal =
         GrainSize ? Builder.CreateIntCast(GrainSize, Builder.getInt64Ty(), true)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index eef71a17fb41d..1f56a8ba8b263 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -357,10 +357,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDevice())
       result = todo("device");
   };
-  auto checkFinal = [&todo](auto op, LogicalResult &result) {
-    if (op.getFinal())
-      result = todo("final");
-  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -382,10 +378,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getParLevelSimd())
       result = todo("parallelization-level");
   };
-  auto checkPriority = [&todo](auto op, LogicalResult &result) {
-    if (op.getPriority())
-      result = todo("priority");
-  };
   auto checkPrivate = [&todo](auto op, LogicalResult &result) {
     if (!op.getPrivateVars().empty() || op.getPrivateSyms())
       result = todo("privatization");
@@ -2738,8 +2730,8 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::Value *ifCond = nullptr;
   llvm::Value *grainsize = nullptr;
   int sched = 0; // default
-  Value grainsizeVal = taskloopOp.getGrainsize();
-  Value numTasksVal = taskloopOp.getNumTasks();
+  mlir::Value grainsizeVal = taskloopOp.getGrainsize();
+  mlir::Value numTasksVal = taskloopOp.getNumTasks();
   if (Value ifVar = taskloopOp.getIfExpr())
     ifCond = moduleTranslation.lookupValue(ifVar);
   if (grainsizeVal) {
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
index c1202d1ffab7b..24ccb6b18b392 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
@@ -77,7 +77,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
index a274e82adba76..af38d170a198c 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
@@ -77,7 +77,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %9, align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %5, i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 1, i64 2, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %5, i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 1, i64 2, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
index d7e16102ae5af..954ab4df02b0e 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
@@ -82,7 +82,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_8]], i32 0, i32 2
 // CHECK:   %[[VAL_12:.*]] = load i64, ptr %[[VAL_11]], align 4
 // CHECK:   %[[IF_VAL:.*]] = sext i1 %4 to i32
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_7]], i32 %[[IF_VAL]], ptr %[[VAL_9]], ptr %[[VAL_10]], i64 %[[VAL_12]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_7]], i32 %[[IF_VAL]], ptr %[[VAL_9]], ptr %[[VAL_10]], i64 %[[VAL_12]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
index 54bd15b7d8ffa..5439f926e22cf 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
@@ -76,7 +76,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 3355a14e38ffd..43b50e7a3206c 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -54,7 +54,7 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 1
 // CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_16]], i32 0, i32 2
 // CHECK:         %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]])
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = %[[VAL_9]]
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
index 7310b17cc04c4..f45731acb160b 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
@@ -68,7 +68,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[gep_omp_task_context_ptr:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[structArg]], i32 0, i32 3
 // CHECK:   store ptr %[[ctx_ptr]], ptr %[[gep_omp_task_context_ptr]], align 8
 // CHECK:   %[[omp_global_thread_num:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
-// CHECK:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
+// CHECK-NOT:   call void @__kmpc_taskgroup(ptr @1, i32 %[[omp_global_thread_num]])
 // CHECK:   %[[VAL_5:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[omp_global_thread_num]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
 // CHECK:   %[[VAL_6:.*]] = load ptr, ptr %[[VAL_5]], align 8
 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_6]], ptr align 1 %[[structArg]], i64 32, i1 false)
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
index 7f0f315ea7c2f..df789027eed13 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
@@ -77,7 +77,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 2, i64 2, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 2, i64 2, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
index b2735f410cf8f..a8cf4c9dd41b1 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
@@ -77,7 +77,7 @@ llvm.func @_QPtest() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
index 121363f76f900..8b5adf565f965 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
@@ -152,7 +152,7 @@ llvm.func @_QPtest_tied() {
 // CHECK:   %[[VAL_8:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 1
 // CHECK:   %[[VAL_9:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_6]], i32 0, i32 2
 // CHECK:   %[[VAL_10:.*]] = load i64, ptr %[[VAL_9]], align 4
-// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup.1)
+// CHECK:   call void @__kmpc_taskloop(ptr @1, i32 %omp_global_thread_num, ptr %[[VAL_5]], i32 1, ptr %[[VAL_7]], ptr %[[VAL_8]], i64 %[[VAL_10]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup.1)
 // CHECK:   call void @__kmpc_end_taskgroup(ptr @1, i32 %omp_global_thread_num)
 // CHECK:   br label %taskloop.exit
 
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index 10962a0108e72..5f31c547e7485 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -70,7 +70,7 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_20:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_18]], i32 0, i32 1
 // CHECK:         %[[VAL_21:.*]] = getelementptr { i64, i64, i64, ptr }, ptr %[[VAL_18]], i32 0, i32 2
 // CHECK:         %[[VAL_22:.*]] = load i64, ptr %[[VAL_21]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_16]], ptr %[[VAL_17]], i32 1, ptr %[[VAL_19]], ptr %[[VAL_20]], i64 %[[VAL_22]], i32 0, i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_16]], ptr %[[VAL_17]], i32 1, ptr %[[VAL_19]], ptr %[[VAL_20]], i64 %[[VAL_22]], i32 1, i32 0, i64 0, ptr @omp_taskloop_dup)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_16]])
 // CHECK:         br label %[[VAL_23:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = %[[VAL_11]]



More information about the llvm-branch-commits mailing list