[llvm-branch-commits] [flang] [mlir] [mlir][OpenMP] Move taskloop clauses to the context op (PR #188070)

Tom Eccles via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 25 03:38:11 PDT 2026


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/188070

>From 197b709fe32f2fe516d2ddf8c1c6c9df4c28ccea Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Fri, 20 Mar 2026 17:39:52 +0000
Subject: [PATCH 1/2] [mlir][OpenMP] Move taskloop clauses to the context op

The clauses are implemented when lowering the context op (which
generates the runtime calls, and handles the outlining of the task
function: including privatization etc). Therefore I thought it made more
sense to put the clauses on this operation rather than on the wrapped
loop.

RFC: https://discourse.llvm.org/t/rfc-openmp-alloca-placement-for-openmp-loop-wrappers/89512/7

Patch 2/3
---
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  16 ++-
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  |   8 +-
 .../OpenMP/taskloop-alloca-placement.f90      |   4 +-
 flang/test/Lower/OpenMP/if-clause.f90         |   6 +-
 flang/test/Lower/OpenMP/implicit-dsa.f90      |  20 +--
 flang/test/Lower/OpenMP/masked_taskloop.f90   |   8 +-
 .../Lower/OpenMP/parallel-masked-taskloop.f90 |   4 +-
 flang/test/Lower/OpenMP/taskloop-cancel.f90   |   4 +-
 flang/test/Lower/OpenMP/taskloop-collapse.f90 |   6 +-
 .../test/Lower/OpenMP/taskloop-grainsize.f90  |   8 +-
 .../Lower/OpenMP/taskloop-inreduction.f90     |   6 +-
 flang/test/Lower/OpenMP/taskloop-numtasks.f90 |   8 +-
 .../test/Lower/OpenMP/taskloop-reduction.f90  |   6 +-
 flang/test/Lower/OpenMP/taskloop.f90          |  40 +++---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 102 +++++++-------
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  80 ++++++-----
 .../OpenMP/Transforms/MarkDeclareTarget.cpp   |   2 +-
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  35 +++--
 .../OpenMPToLLVM/convert-to-llvmir.mlir       |   8 +-
 mlir/test/Dialect/OpenMP/invalid.mlir         |  92 ++++++-------
 mlir/test/Dialect/OpenMP/ops.mlir             | 128 +++++++++---------
 .../Target/LLVMIR/openmp-taskloop-cancel.mlir |  12 +-
 .../openmp-taskloop-cancellation-point.mlir   |   8 +-
 .../LLVMIR/openmp-taskloop-collapse.mlir      |  24 ++--
 .../openmp-taskloop-context-alloca.mlir       |   4 +-
 .../Target/LLVMIR/openmp-taskloop-final.mlir  |   4 +-
 .../LLVMIR/openmp-taskloop-grainsize.mlir     |   4 +-
 .../Target/LLVMIR/openmp-taskloop-if.mlir     |   4 +-
 .../LLVMIR/openmp-taskloop-mergeable.mlir     |   4 +-
 .../openmp-taskloop-no-context-struct.mlir    |   4 +-
 .../LLVMIR/openmp-taskloop-nogroup.mlir       |   4 +-
 .../LLVMIR/openmp-taskloop-num_tasks.mlir     |   4 +-
 .../LLVMIR/openmp-taskloop-priority.mlir      |   4 +-
 .../Target/LLVMIR/openmp-taskloop-untied.mlir |   8 +-
 mlir/test/Target/LLVMIR/openmp-taskloop.mlir  |   4 +-
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  21 ++-
 36 files changed, 358 insertions(+), 346 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 414f3b5fd3f02..ba3a967bf6986 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1826,7 +1826,7 @@ static void genTaskgroupClauses(
 static void genTaskloopClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
-    mlir::Location loc, mlir::omp::TaskloopOperands &clauseOps,
+    mlir::Location loc, mlir::omp::TaskloopContextOperands &clauseOps,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms,
     llvm::SmallVectorImpl<const semantics::Symbol *> &inReductionSyms) {
 
@@ -3250,7 +3250,8 @@ static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
-  mlir::omp::TaskloopOperands taskloopClauseOps;
+  mlir::omp::TaskloopContextOperands taskloopClauseOps;
+  mlir::omp::TaskloopOperands wrapperClauseOps;
   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
   llvm::SmallVector<const semantics::Symbol *> inReductionSyms;
 
@@ -3275,17 +3276,18 @@ static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
   taskloopArgs.inReduction.vars = taskloopClauseOps.inReductionVars;
 
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-  auto taskLoopContextOp =
-      mlir::omp::TaskloopContextOp::create(firOpBuilder, loc);
+  auto taskLoopContextOp = mlir::omp::TaskloopContextOp::create(
+      firOpBuilder, loc, taskloopClauseOps);
+  // Create entry block with arguments.
+  genEntryBlock(firOpBuilder, taskloopArgs, taskLoopContextOp.getRegion());
 
   mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
-  firOpBuilder.createBlock(&taskLoopContextOp.getRegion());
   firOpBuilder.setInsertionPointToStart(&taskLoopContextOp.getRegion().front());
   auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
-      converter, loc, taskloopClauseOps, taskloopArgs);
+      converter, loc, wrapperClauseOps, taskloopArgs);
 
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
-                loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}},
+                loopNestClauseOps, iv, {{taskLoopContextOp, taskloopArgs}},
                 llvm::omp::Directive::OMPD_taskloop, dsp);
 
   firOpBuilder.setInsertionPointAfter(taskLoopOp);
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 550e8a3a281d6..c1551e36cff9b 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -834,10 +834,10 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
                 })
                 .template Case<omp::DistributeOp, omp::ParallelOp,
                                omp::SectionsOp, omp::SimdOp, omp::SingleOp,
-                               omp::TaskloopOp, omp::TaskOp, omp::WsloopOp>(
-                    [&](auto privateOp) {
-                      isPrivateItem = isPrivateArg(argIface, privateOp, op);
-                    });
+                               omp::TaskloopContextOp, omp::TaskOp,
+                               omp::WsloopOp>([&](auto privateOp) {
+                  isPrivateItem = isPrivateArg(argIface, privateOp, op);
+                });
             if (ompValArg) {
               v = ompValArg;
               defOp = ompValArg.getDefiningOp();
diff --git a/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90 b/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
index d6b272cc4dd57..f8ccc3f02edf1 100644
--- a/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
+++ b/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
@@ -18,9 +18,9 @@ subroutine test_taskloop(lb,ub,step,a)
   integer, allocatable :: a(:)
 
   !$omp taskloop shared(a)
-! CHECK:         omp.taskloop.context {
+! CHECK:         omp.taskloop.context private({{.*}}) {
 ! CHECK:           llvm.alloca
-! CHECK:           omp.taskloop private({{.*}}) {
+! CHECK:           omp.taskloop {
   do i = lb,ub,step
 ! CHECK-NOT:         llvm.alloca
     a(i) = i
diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90
index 869d98813b6bf..46ce7b31d51b9 100644
--- a/flang/test/Lower/OpenMP/if-clause.f90
+++ b/flang/test/Lower/OpenMP/if-clause.f90
@@ -1585,24 +1585,24 @@ program main
   ! ----------------------------------------------------------------------------
 
   ! CHECK:      omp.taskloop.context
-  ! CHECK:      omp.taskloop
   ! CHECK-NOT: if({{.*}})
+  ! CHECK:      omp.taskloop
   !$omp taskloop
   do i = 1, 10
   end do
   !$omp end taskloop
 
   ! CHECK:      omp.taskloop.context
-  ! CHECK:      omp.taskloop
   ! CHECK-SAME: if({{.*}})
+  ! CHECK:      omp.taskloop
   !$omp taskloop if(.true.)
   do i = 1, 10
   end do
   !$omp end taskloop
 
   ! CHECK:      omp.taskloop.context
-  ! CHECK:      omp.taskloop
   ! CHECK-SAME: if({{.*}})
+  ! CHECK:      omp.taskloop
   !$omp taskloop if(taskloop: .true.)
   do i = 1, 10
   end do
diff --git a/flang/test/Lower/OpenMP/implicit-dsa.f90 b/flang/test/Lower/OpenMP/implicit-dsa.f90
index 3f1d80bb6303c..1a3fb33c8e340 100644
--- a/flang/test/Lower/OpenMP/implicit-dsa.f90
+++ b/flang/test/Lower/OpenMP/implicit-dsa.f90
@@ -352,9 +352,9 @@ subroutine implicit_dsa_test7
 ! CHECK:           %[[DECL_Z:.*]]:2 = hlfir.declare %[[ALLOCA_Z]] {uniq_name = "_QFimplicit_dsa_taskloop_test1Ez"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine implicit_dsa_taskloop_test1
    integer :: x, y, z
-   ! CHECK: omp.taskloop.context {
-   ! CHECK: omp.taskloop private(
+   ! CHECK: omp.taskloop.context private(
    ! CHECK-SAME: @[[TASKLOOP_TEST1_Y_PRIVATE]] %[[DECL_Y]]#0 -> %[[ARG0:.*]], @[[TASKLOOP_TEST1_X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG1:.*]], @[[TASKLOOP_TEST1_I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG2:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK: omp.taskloop {
    ! CHECK: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
    !$omp taskloop private(y) shared(z)
    do i = 1, 100
@@ -366,8 +366,8 @@ subroutine implicit_dsa_taskloop_test1
    end do
    !$omp end taskloop
 
-   ! CHECK: omp.taskloop.context {
-   ! CHECK: omp.taskloop private(@[[TASKLOOP_TEST1_I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+   ! CHECK: omp.taskloop.context private(@[[TASKLOOP_TEST1_I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+   ! CHECK: omp.taskloop {
    !$omp taskloop default(shared)
    do i = 1, 100
       ! CHECK:  %[[LOAD_Y:.*]] = fir.load %[[DECL_Y]]#0 : !fir.ref<i32>
@@ -389,8 +389,8 @@ subroutine implicit_dsa_taskloop_test2
    integer :: x
    ! CHECK:   omp.parallel {
    !$omp parallel 
-   ! CHECK:   omp.taskloop.context
-   ! CHECK:   omp.taskloop private(@[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+   ! CHECK:   omp.taskloop.context private(@[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+   ! CHECK:   omp.taskloop {
    !$omp taskloop
    do i = 1, 100
       ! CHECK: hlfir.assign %{{.*}} to %[[X_DECL]]#0 : i32, !fir.ref<i32>
@@ -398,8 +398,8 @@ subroutine implicit_dsa_taskloop_test2
    end do
    !$omp end taskloop
 
-   ! CHECK: omp.taskloop.context
-   ! CHECK: omp.taskloop private(@[[TASKLOOP_TEST2_X_PRIVATE]] %[[X_DECL]]#0 -> %[[ARG0]], @[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK: omp.taskloop.context private(@[[TASKLOOP_TEST2_X_PRIVATE]] %[[X_DECL]]#0 -> %[[ARG0]], @[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK: omp.taskloop {
    !$omp taskloop private(x)
    do i = 1, 10
       ! CHECK: %[[DECL_PRIV_X:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFimplicit_dsa_taskloop_test2Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -428,8 +428,8 @@ subroutine implicit_dsa_taskloop_test3
    ! CHECK:  omp.parallel private(@[[TASKLOOP_TEST3_X_FIRSTPRIVATE]] %[[X_DECL]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
    ! CHECK:  %[[X_PRIV_VAL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFimplicit_dsa_taskloop_test3Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
    !$omp parallel firstprivate(x)
-   ! CHECK:  omp.taskloop.context
-   ! CHECK:  omp.taskloop private(@[[TASKLOOP_TEST3_X_FIRSTPRIVATE]] %[[X_PRIV_VAL]]#0 -> %[[ARG1:.*]], @[[TASKLOOP_TEST3_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG2:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:  omp.taskloop.context private(@[[TASKLOOP_TEST3_X_FIRSTPRIVATE]] %[[X_PRIV_VAL]]#0 -> %[[ARG1:.*]], @[[TASKLOOP_TEST3_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG2:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:  omp.taskloop {
    !$omp taskloop
    ! CHECK:  %[[X_VAL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFimplicit_dsa_taskloop_test3Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
    do i = 1, 100
diff --git a/flang/test/Lower/OpenMP/masked_taskloop.f90 b/flang/test/Lower/OpenMP/masked_taskloop.f90
index 5f1a824026069..1285d5c4cb758 100644
--- a/flang/test/Lower/OpenMP/masked_taskloop.f90
+++ b/flang/test/Lower/OpenMP/masked_taskloop.f90
@@ -22,13 +22,13 @@
 ! CHECK:            %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:            %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:            %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:            omp.taskloop.context {
-! CHECK:              omp.taskloop private(
+! CHECK:            omp.taskloop.context private(
 ! CHECK-SAME:            @[[J_FIRSTPRIVATE]] %[[DECL_J]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK:                omp.loop_nest (%arg2) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
+! CHECK:              omp.taskloop {
+! CHECK:                omp.loop_nest (%[[IV:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
 ! CHECK:                  %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_masked_taskloopEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                  %[[VAL2:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFtest_masked_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                  hlfir.assign %arg2 to %[[VAL2]]#0 : i32, !fir.ref<i32>
+! CHECK:                  hlfir.assign %[[IV]] to %[[VAL2]]#0 : i32, !fir.ref<i32>
 ! CHECK:                  %[[LOAD_J:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
 ! CHECK:                  %[[C1_I32_1:.*]] = arith.constant 1 : i32
 ! CHECK:                  %[[RES_J:.*]] = arith.addi %[[LOAD_J]], %[[C1_I32_1]] : i32
diff --git a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
index eeac5719f895e..dc94513e2f83b 100644
--- a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
@@ -16,8 +16,8 @@
 ! CHECK:              %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:              %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:              %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:              omp.taskloop.context {
-! CHECK:                omp.taskloop private(@[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:              omp.taskloop.context private(@[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:                omp.taskloop {
 ! CHECK:                  omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%c1_i32_0) {
 ! CHECK:                    %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_parallel_master_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                    hlfir.assign %[[ARG1]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/taskloop-cancel.f90 b/flang/test/Lower/OpenMP/taskloop-cancel.f90
index 036d0071d9e80..4109d498862b7 100644
--- a/flang/test/Lower/OpenMP/taskloop-cancel.f90
+++ b/flang/test/Lower/OpenMP/taskloop-cancel.f90
@@ -12,8 +12,8 @@
 ! CHECK:           %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:           %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:           omp.taskloop.context {
-! CHECK:             omp.taskloop private(@[[I_PRIVATE]] %2#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:           omp.taskloop.context private(@[[I_PRIVATE]] %2#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:             omp.taskloop {
 ! CHECK:               omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
 ! CHECK:                 %[[IDX:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 hlfir.assign %[[ARG1]] to %[[IDX]]#0 : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/taskloop-collapse.f90 b/flang/test/Lower/OpenMP/taskloop-collapse.f90
index 288d06c7e5a37..a73f351b6d19d 100644
--- a/flang/test/Lower/OpenMP/taskloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/taskloop-collapse.f90
@@ -22,10 +22,10 @@ subroutine test()
 
     !$omp taskloop collapse(2)
     ! CHECK:      omp.taskloop.context
-    ! CHECK:      omp.taskloop
-    ! CHECK-SAME: private(@_QFtestEsum_firstprivate_i32 %[[DECLARE_SUM]]#0 -> %arg0, @_QFtestEi_private_i32 %[[DECLARE_I]]#0 -> %arg1, @_QFtestEj_private_i32 %[[DECLARE_J]]#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>)
+    ! CHECK-SAME:   private(@_QFtestEsum_firstprivate_i32 %[[DECLARE_SUM]]#0 -> %arg0, @_QFtestEi_private_i32 %[[DECLARE_I]]#0 -> %arg1, @_QFtestEj_private_i32 %[[DECLARE_J]]#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>)
+    ! CHECK:      omp.taskloop {
     ! CHECK-LABEL: omp.loop_nest
-    ! CHECK-SAME: (%arg3, %arg4) : i32 = (%c1_i32, %c1_i32_1) to (%c10_i32, %c5_i32) inclusive step (%c1_i32_0, %c1_i32_2) collapse(2)
+    ! CHECK-SAME:   (%[[IV1:.*]], %[[IV2:.*]]) : i32 = (%c1_i32, %c1_i32_1) to (%c10_i32, %c5_i32) inclusive step (%c1_i32_0, %c1_i32_2) collapse(2)
     do i = 1, 10
         do j = 1, 5
             sum = sum + i + j
diff --git a/flang/test/Lower/OpenMP/taskloop-grainsize.f90 b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
index eaad0b08f1a8e..6313cf02d084d 100644
--- a/flang/test/Lower/OpenMP/taskloop-grainsize.f90
+++ b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
@@ -27,9 +27,9 @@
 ! CHECK:          %[[GRAINSIZE:.*]] = arith.constant 10 : i32
 subroutine test_grainsize
    integer :: i, x
-   ! CHECK:        omp.taskloop.context {
-   ! CHECK:          omp.taskloop grainsize(%[[GRAINSIZE]]: i32)
+   ! CHECK:        omp.taskloop.context grainsize(%[[GRAINSIZE]]: i32)
    ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:          omp.taskloop {
    ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
    !$omp taskloop grainsize(10)
    do i = 1, 1000
@@ -42,8 +42,8 @@ end subroutine test_grainsize
 subroutine test_grainsize_strict
   integer :: i, x
   ! CHECK: %[[GRAINSIZE:.*]] = arith.constant 10 : i32
-  ! CHECK: omp.taskloop.context {
-  ! CHECK:   omp.taskloop grainsize(strict, %[[GRAINSIZE]]: i32)
+  ! CHECK: omp.taskloop.context grainsize(strict, %[[GRAINSIZE]]: i32)
+  ! CHECK:   omp.taskloop {
   !$omp taskloop grainsize(strict:10)
   do i = 1, 1000
      !CHECK: arith.addi
diff --git a/flang/test/Lower/OpenMP/taskloop-inreduction.f90 b/flang/test/Lower/OpenMP/taskloop-inreduction.f90
index a442c497e60be..ffe62c4d4f3d2 100644
--- a/flang/test/Lower/OpenMP/taskloop-inreduction.f90
+++ b/flang/test/Lower/OpenMP/taskloop-inreduction.f90
@@ -25,9 +25,9 @@
 subroutine omp_taskloop_inreduction()
    integer x
    x = 0
-   ! CHECK:        omp.taskloop.context {
-   ! CHECK:        omp.taskloop in_reduction(@[[ADD_RED_I32]] 
-   ! CHECK:        %[[DECL_X]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) private(@[[PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   ! CHECK:        omp.taskloop.context in_reduction(@[[ADD_RED_I32]] 
+   ! CHECK-SAME:     %[[DECL_X]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) private(@[[PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   ! CHECK:        omp.taskloop {
    ! CHECK:        %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG0]] 
    ! CHECK-SAME:   {uniq_name = "_QFomp_taskloop_inreductionEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
    !$omp taskloop in_reduction(+:x)
diff --git a/flang/test/Lower/OpenMP/taskloop-numtasks.f90 b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
index 668bde7835c72..6677099ee6963 100644
--- a/flang/test/Lower/OpenMP/taskloop-numtasks.f90
+++ b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
@@ -27,9 +27,9 @@
 ! CHECK:          %[[VAL_NUMTASKS:.*]] = arith.constant 10 : i32
 subroutine test_num_tasks
    integer :: i, x
-   ! CHECK:        omp.taskloop.context {
-   ! CHECK:          omp.taskloop num_tasks(%[[VAL_NUMTASKS]]: i32)
+   ! CHECK:        omp.taskloop.context num_tasks(%[[VAL_NUMTASKS]]: i32)
    ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:          omp.taskloop {
    ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
    !$omp taskloop num_tasks(10)
    do i = 1, 1000
@@ -42,8 +42,8 @@ end subroutine test_num_tasks
 subroutine test_num_tasks_strict
   integer :: x, i
   ! CHECK:  %[[NUM_TASKS:.*]] = arith.constant 10 : i32
-  ! CHECK: omp.taskloop.context {
-  ! CHECK:   omp.taskloop num_tasks(strict, %[[NUM_TASKS]]: i32)
+  ! CHECK: omp.taskloop.context num_tasks(strict, %[[NUM_TASKS]]: i32)
+  ! CHECK:   omp.taskloop {
   !$omp taskloop num_tasks(strict:10)
   do i = 1, 100
      !CHECK: arith.addi
diff --git a/flang/test/Lower/OpenMP/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/taskloop-reduction.f90
index 6c7e74f051899..8b7b97fe13cc6 100644
--- a/flang/test/Lower/OpenMP/taskloop-reduction.f90
+++ b/flang/test/Lower/OpenMP/taskloop-reduction.f90
@@ -25,9 +25,9 @@
 subroutine omp_taskloop_reduction()
    integer x
    x = 0
-   ! CHECK:       omp.taskloop.context {
-   ! CHECK:       omp.taskloop private(@[[PRIVATE_I]] 
-   ! CHECK-SAME:  %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) reduction(@[[ADD_RED_I32]] %[[DECL_X]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   ! CHECK:       omp.taskloop.context private(@[[PRIVATE_I]] 
+   ! CHECK-SAME:    %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) reduction(@[[ADD_RED_I32]] %[[DECL_X]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   ! CHECK:       omp.taskloop {
    ! CHECK:       %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] 
    !$omp taskloop reduction(+:x)
    do i = 1, 100
diff --git a/flang/test/Lower/OpenMP/taskloop.f90 b/flang/test/Lower/OpenMP/taskloop.f90
index e379646703e99..8a4c5a8e2a0a4 100644
--- a/flang/test/Lower/OpenMP/taskloop.f90
+++ b/flang/test/Lower/OpenMP/taskloop.f90
@@ -54,8 +54,8 @@
 ! CHECK:          %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:          %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:          %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:          omp.taskloop.context {
-! CHECK:            omp.taskloop private(@[[RES_FIRSTPRIVATE]] %[[RES_VAL]]#0 -> %[[PRIV_RES:.*]], @[[I_PRIVATE]] %[[I_VAL]]#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:          omp.taskloop.context private(@[[RES_FIRSTPRIVATE]] %[[RES_VAL]]#0 -> %[[PRIV_RES:.*]], @[[I_PRIVATE]] %[[I_VAL]]#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:            omp.taskloop {
 ! CHECK:              omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
 ! CHECK:                %[[RES_DECL:.*]]:2 = hlfir.declare %[[PRIV_RES]] {uniq_name = "_QFomp_taskloopEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                %[[I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -89,8 +89,8 @@ end subroutine omp_taskloop
 ! CHECK:           %[[DECL_RES:.*]]:2 = hlfir.declare %[[ALLOCA_RES]] {uniq_name = "_QFomp_taskloop_privateEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine omp_taskloop_private
   integer :: res, i
-! CHECK:           omp.taskloop.context {
-! CHECK:             omp.taskloop private(@[[RES_PRIVATE_TEST2]] %[[DECL_RES]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST2]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:           omp.taskloop.context private(@[[RES_PRIVATE_TEST2]] %[[DECL_RES]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST2]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:             omp.taskloop {
 ! CHECK:               omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
 ! CHECK:                 %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_privateEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   !$omp taskloop private(res)
@@ -118,9 +118,9 @@ end subroutine omp_taskloop_private
 subroutine taskloop_allocate()
    use omp_lib
    integer :: x
-   ! CHECK:         omp.taskloop.context {
-   ! CHECK:           omp.taskloop allocate(%{{.*}} : i64 -> %[[DECL_X]]#0 : !fir.ref<i32>) 
+   ! CHECK:         omp.taskloop.context allocate(%{{.*}} : i64 -> %[[DECL_X]]#0 : !fir.ref<i32>) 
    ! CHECK-SAME:      private(@[[X_PRIVATE_TEST_ALLOCATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST_ALLOCATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:           omp.taskloop {
    !$omp taskloop allocate(omp_high_bw_mem_alloc: x) private(x)
    do i = 1, 100
       ! CHECK: arith.addi
@@ -138,8 +138,8 @@ end subroutine taskloop_allocate
 ! CHECK:           %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtaskloop_finalEi"}
 ! CHECK:           %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtaskloop_finalEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine taskloop_final()
-    ! CHECK:  omp.taskloop.context {
-    ! CHECK:    omp.taskloop final(%true) private(@[[I_PRIVATE_FINAL]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+    ! CHECK:  omp.taskloop.context final(%true) private(@[[I_PRIVATE_FINAL]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+    ! CHECK:    omp.taskloop {
    !$omp taskloop final(.true.)
    do i = 1, 100
       ! CHECK: fir.call @_QPfoo()
@@ -160,8 +160,8 @@ subroutine taskloop_final()
 ! CHECK:           %[[VAL_BAR:.*]] = fir.convert %[[LOAD_VAL]] : (!fir.logical<4>) -> i1
 subroutine omp_taskloop_if(bar)
    logical, intent(inout) :: bar
-   !CHECK: omp.taskloop.context {
-   !CHECK:   omp.taskloop if(%[[VAL_BAR]]) private(@[[I_PRIVATE_IF_TEST1]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   !CHECK: omp.taskloop.context if(%[[VAL_BAR]]) private(@[[I_PRIVATE_IF_TEST1]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   !CHECK:   omp.taskloop {
    !$omp taskloop if(bar)
    do i = 1, 10
       call foo()
@@ -175,8 +175,8 @@ end subroutine omp_taskloop_if
 
 ! CHECK-LABEL:  func.func @_QPtest_mergeable
 subroutine test_mergeable
-  ! CHECK: omp.taskloop.context {
-  ! CHECK:   omp.taskloop mergeable
+  ! CHECK: omp.taskloop.context mergeable
+  ! CHECK:   omp.taskloop {
   !$omp taskloop mergeable
   do i = 1, 10
   end do
@@ -192,8 +192,8 @@ end subroutine test_mergeable
 ! CHECK:          %[[LOAD_VAL:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
 subroutine test_priority(n)
    integer, intent(inout) :: n
-   ! CHECK:  omp.taskloop.context {
-   ! CHECK:    omp.taskloop priority(%[[LOAD_VAL]] : i32)
+   ! CHECK:  omp.taskloop.context priority(%[[LOAD_VAL]] : i32)
+   ! CHECK:    omp.taskloop {
    !$omp taskloop priority(n)
    do i = 1, 10
    end do
@@ -206,8 +206,8 @@ end subroutine test_priority
 
 ! CHECK-LABEL:  func.func @_QPomp_taskloop_untied
 subroutine omp_taskloop_untied()
-  ! CHECK: omp.taskloop.context {
-  ! CHECK:   omp.taskloop untied
+  ! CHECK: omp.taskloop.context untied
+  ! CHECK:   omp.taskloop {
   !$omp taskloop untied
   do i = 1, 10
     call foo()
@@ -220,8 +220,8 @@ subroutine omp_taskloop_untied()
 !===============================================================================
 
 subroutine omp_taskloop_nogroup()
-  ! CHECK: omp.taskloop.context {
-  ! CHECK:   omp.taskloop nogroup
+  ! CHECK: omp.taskloop.context nogroup
+  ! CHECK:   omp.taskloop {
   !$omp taskloop nogroup
   do i = 1, 10
     call foo()
@@ -241,8 +241,8 @@ subroutine omp_taskloop_nogroup()
 subroutine omp_taskloop_lastprivate()
    integer x
    x = 0
-   ! CHECK:  omp.taskloop.context {
-   ! CHECK:    omp.taskloop private(@[[LAST_PRIVATE_X]] %[[DECL_X]]#0 -> %[[ARG0]], @[[LAST_PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:  omp.taskloop.context private(@[[LAST_PRIVATE_X]] %[[DECL_X]]#0 -> %[[ARG0]], @[[LAST_PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:    omp.taskloop {
    !$omp taskloop lastprivate(x)
    do i = 1, 100
       ! CHECK: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_lastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 64ba76a1eddbe..a4f09f883e2f1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -975,9 +975,15 @@ def TaskOp
 // SingleBlock restriction is just to ease implementation. It would be okay to
 // relax this.
 def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
-    AutomaticAllocationScope, RecursiveMemoryEffects, SingleBlock,
+    AttrSizedOperandSegments, AutomaticAllocationScope,
+    RecursiveMemoryEffects, SingleBlock,
     DeclareOpInterfaceMethods<OutlineableOpenMPOpInterface>
-  ], clauses = [ /* TODO */], singleRegion = true> {
+  ], clauses = [
+    OpenMP_AllocateClause, OpenMP_FinalClause, OpenMP_GrainsizeClause,
+    OpenMP_IfClause, OpenMP_InReductionClause, OpenMP_MergeableClause,
+    OpenMP_NogroupClause, OpenMP_NumTasksClause, OpenMP_PriorityClause,
+    OpenMP_PrivateClause, OpenMP_ReductionClause, OpenMP_UntiedClause
+  ], singleRegion = true> {
   let summary = "OutlinableOpenMPOpInterface wrapper for taskloop construct";
   let description = [{
     The taskloop construct specifies that the iterations of one or more
@@ -997,7 +1003,7 @@ def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
     ```
     omp.taskloop.context <clauses> {
       // task-local stack allocations can go here
-      omp.taskloop <clauses> {
+      omp.taskloop {
         omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
           %a = load %arrA[%i1, %i2] : memref<?x?xf32>
           %b = load %arrB[%i1, %i2] : memref<?x?xf32>
@@ -1009,27 +1015,63 @@ def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
       omp.terminator
     }
     ```
-  }] # clausesDescription;
 
-  let assemblyFormat = "$region attr-dict";
+    For definitions of "undeferred task", "included task", "final task" and
+    "mergeable task", please check OpenMP Specification.
+
+    When an `if` clause is present on a taskloop construct, and if the `if`
+    clause expression evaluates to `false`, undeferred tasks are generated. The
+    use of a variable in an `if` clause expression of a taskloop construct
+    causes an implicit reference to the variable in all enclosing constructs.
+  }] # clausesDescription # [{
+    If an `in_reduction` clause is present on the taskloop construct, the
+    behavior is as if each generated task was defined by a task construct on
+    which an `in_reduction` clause with the same reduction operator and list
+    items is present. Thus, the generated tasks are participants of a reduction
+    previously defined by a reduction scoping clause. In this case, accumulator
+    variables are specified in `in_reduction_vars`, symbols referring to
+    reduction declarations in `in_reduction_syms` and `in_reduction_byref`
+    indicate for each reduction variable whether it should be passed by value or
+    by reference.
+
+    If a `reduction` clause is present on the taskloop construct, the behavior
+    is as if a `task_reduction` clause with the same reduction operator and list
+    items was applied to the implicit taskgroup construct enclosing the taskloop
+    construct. The taskloop construct executes as if each generated task was
+    defined by a task construct on which an `in_reduction` clause with the same
+    reduction operator and list items is present. Thus, the generated tasks are
+    participants of the reduction defined by the `task_reduction` clause that
+    was applied to the implicit taskgroup construct.
+  }];
+
+  let builders = [
+    OpBuilder<(ins CArg<"const TaskloopContextOperands &">:$clauses)>
+  ];
+
+  let assemblyFormat = clausesAssemblyFormat # [{
+    custom<InReductionPrivateReductionRegion>(
+        $region, $in_reduction_vars, type($in_reduction_vars),
+        $in_reduction_byref, $in_reduction_syms, $private_vars,
+        type($private_vars), $private_syms, $private_needs_barrier,
+        $reduction_mod, $reduction_vars, type($reduction_vars),
+        $reduction_byref, $reduction_syms) attr-dict
+  }];
 
   let extraClassDeclaration = [{
     TaskloopOp getLoopOp();
   }] # clausesExtraClassDeclaration;
 
+  let hasVerifier = 1;
   let hasRegionVerifier = 1;
 }
 
 def TaskloopOp : OpenMP_Op<"taskloop", traits = [
-    AttrSizedOperandSegments,
     DeclareOpInterfaceMethods<ComposableOpInterface>,
     DeclareOpInterfaceMethods<LoopWrapperInterface>, NoTerminator,
     RecursiveMemoryEffects, SingleBlock
   ], clauses = [
-    OpenMP_AllocateClause, OpenMP_FinalClause, OpenMP_GrainsizeClause,
-    OpenMP_IfClause, OpenMP_InReductionClause, OpenMP_MergeableClause,
-    OpenMP_NogroupClause, OpenMP_NumTasksClause, OpenMP_PriorityClause,
-    OpenMP_PrivateClause, OpenMP_ReductionClause, OpenMP_UntiedClause
+    // See taskloop.context, which generates the runtime calls and outlined
+    // task function
   ], singleRegion = true> {
   let summary = "taskloop construct";
   let description = [{
@@ -1049,7 +1091,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", traits = [
 
     ```
     omp.taskloop.context <clauses> {
-      omp.taskloop <clauses> {
+      omp.taskloop {
         omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
           %a = load %arrA[%i1, %i2] : memref<?x?xf32>
           %b = load %arrB[%i1, %i2] : memref<?x?xf32>
@@ -1061,47 +1103,13 @@ def TaskloopOp : OpenMP_Op<"taskloop", traits = [
       omp.terminator
     }
     ```
-
-    For definitions of "undeferred task", "included task", "final task" and
-    "mergeable task", please check OpenMP Specification.
-
-    When an `if` clause is present on a taskloop construct, and if the `if`
-    clause expression evaluates to `false`, undeferred tasks are generated. The
-    use of a variable in an `if` clause expression of a taskloop construct
-    causes an implicit reference to the variable in all enclosing constructs.
-  }] # clausesDescription # [{
-    If an `in_reduction` clause is present on the taskloop construct, the
-    behavior is as if each generated task was defined by a task construct on
-    which an `in_reduction` clause with the same reduction operator and list
-    items is present. Thus, the generated tasks are participants of a reduction
-    previously defined by a reduction scoping clause. In this case, accumulator
-    variables are specified in `in_reduction_vars`, symbols referring to
-    reduction declarations in `in_reduction_syms` and `in_reduction_byref`
-    indicate for each reduction variable whether it should be passed by value or
-    by reference.
-
-    If a `reduction` clause is present on the taskloop construct, the behavior
-    is as if a `task_reduction` clause with the same reduction operator and list
-    items was applied to the implicit taskgroup construct enclosing the taskloop
-    construct. The taskloop construct executes as if each generated task was
-    defined by a task construct on which an `in_reduction` clause with the same
-    reduction operator and list items is present. Thus, the generated tasks are
-    participants of the reduction defined by the `task_reduction` clause that
-    was applied to the implicit taskgroup construct.
-  }];
+  }] # clausesDescription;
 
   let builders = [
     OpBuilder<(ins CArg<"const TaskloopOperands &">:$clauses)>
   ];
 
-  let assemblyFormat = clausesAssemblyFormat # [{
-    custom<InReductionPrivateReductionRegion>(
-        $region, $in_reduction_vars, type($in_reduction_vars),
-        $in_reduction_byref, $in_reduction_syms, $private_vars,
-        type($private_vars), $private_syms, $private_needs_barrier,
-        $reduction_mod, $reduction_vars, type($reduction_vars),
-        $reduction_byref, $reduction_syms) attr-dict
-  }];
+  let assemblyFormat = "$region attr-dict";
 
   let extraClassDeclaration = [{
     TaskloopContextOp getContextOp();
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 25fe7bded5cc8..39ded918d25a9 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3236,36 +3236,10 @@ LogicalResult TaskgroupOp::verify() {
 // TaskloopContextOp
 //===----------------------------------------------------------------------===//
 
-TaskloopOp TaskloopContextOp::getLoopOp() {
-  for (mlir::Operation &op : getRegion().front())
-    if (auto taskloopOp = dyn_cast<TaskloopOp>(&op))
-      return taskloopOp;
-  return nullptr;
-}
-
-LogicalResult TaskloopContextOp::verifyRegions() {
-  Region &region = getRegion();
-  if (region.empty())
-    return emitOpError() << "expected non-empty region";
-
-  auto count = llvm::count_if(
-      region.front(), [](mlir::Operation &op) { return isa<TaskloopOp>(op); });
-  if (count != 1)
-    return emitOpError() << "expected exactly 1 TaskloopOp directly nested in "
-                            "the region, but "
-                         << count << " were found";
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// TaskloopOp
-//===----------------------------------------------------------------------===//
-
-void TaskloopOp::build(OpBuilder &builder, OperationState &state,
-                       const TaskloopOperands &clauses) {
+void TaskloopContextOp::build(OpBuilder &builder, OperationState &state,
+                              const TaskloopContextOperands &clauses) {
   MLIRContext *ctx = builder.getContext();
-  TaskloopOp::build(
+  TaskloopContextOp::build(
       builder, state, clauses.allocateVars, clauses.allocatorVars,
       clauses.final, clauses.grainsizeMod, clauses.grainsize, clauses.ifExpr,
       clauses.inReductionVars,
@@ -3279,15 +3253,14 @@ void TaskloopOp::build(OpBuilder &builder, OperationState &state,
       makeArrayAttr(ctx, clauses.reductionSyms), clauses.untied);
 }
 
-TaskloopContextOp TaskloopOp::getContextOp() {
-  return getOperation()->getParentOfType<TaskloopContextOp>();
+TaskloopOp TaskloopContextOp::getLoopOp() {
+  for (mlir::Operation &op : getRegion().front())
+    if (auto taskloopOp = dyn_cast<TaskloopOp>(&op))
+      return taskloopOp;
+  return nullptr;
 }
 
-LogicalResult TaskloopOp::verify() {
-  TaskloopContextOp context = getContextOp();
-  if (!context)
-    return emitOpError() << "expected to be nested in a taskloop context op";
-
+LogicalResult TaskloopContextOp::verify() {
   if (getAllocateVars().size() != getAllocatorVars().size())
     return emitError(
         "expected equal sizes for allocate and allocator variables");
@@ -3316,6 +3289,41 @@ LogicalResult TaskloopOp::verify() {
   return success();
 }
 
+LogicalResult TaskloopContextOp::verifyRegions() {
+  Region &region = getRegion();
+  if (region.empty())
+    return emitOpError() << "expected non-empty region";
+
+  auto count = llvm::count_if(
+      region.front(), [](mlir::Operation &op) { return isa<TaskloopOp>(op); });
+  if (count != 1)
+    return emitOpError() << "expected exactly 1 TaskloopOp directly nested in "
+                            "the region, but "
+                         << count << " were found";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// TaskloopOp
+//===----------------------------------------------------------------------===//
+
+void TaskloopOp::build(OpBuilder &builder, OperationState &state,
+                       const TaskloopOperands &clauses) {
+  TaskloopOp::build(builder, state);
+}
+
+TaskloopContextOp TaskloopOp::getContextOp() {
+  return getOperation()->getParentOfType<TaskloopContextOp>();
+}
+
+LogicalResult TaskloopOp::verify() {
+  TaskloopContextOp context = getContextOp();
+  if (!context)
+    return emitOpError() << "expected to be nested in a taskloop context op";
+  return success();
+}
+
 LogicalResult TaskloopOp::verifyRegions() {
   if (LoopWrapperInterface nested = getNestedWrapper()) {
     if (!isComposite())
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/MarkDeclareTarget.cpp b/mlir/lib/Dialect/OpenMP/Transforms/MarkDeclareTarget.cpp
index 18a36f73edaf2..2bfa786f53ea3 100644
--- a/mlir/lib/Dialect/OpenMP/Transforms/MarkDeclareTarget.cpp
+++ b/mlir/lib/Dialect/OpenMP/Transforms/MarkDeclareTarget.cpp
@@ -101,7 +101,7 @@ class MarkDeclareTargetPass
         .Case([&](omp::TaskgroupOp op) {
           processReductionRefs(op.getTaskReductionSyms(), parentInfo, visited);
         })
-        .Case([&](omp::TaskloopOp op) {
+        .Case([&](omp::TaskloopContextOp op) {
           processReductionRefs(op.getReductionSyms(), parentInfo, visited);
           processReductionRefs(op.getInReductionSyms(), parentInfo, visited);
         })
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 84eb0fcde98c0..e99277fdeb022 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -355,7 +355,7 @@ static LogicalResult checkImplementationStatus(Operation &op) {
       result = todo("privatization");
   };
   auto checkReduction = [&todo](auto op, LogicalResult &result) {
-    if (isa<omp::TeamsOp>(op) || isa<omp::TaskloopOp>(op))
+    if (isa<omp::TeamsOp>(op) || isa<omp::TaskloopContextOp>(op))
       if (!op.getReductionVars().empty() || op.getReductionByref() ||
           op.getReductionSyms())
         result = todo("reduction");
@@ -416,9 +416,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkNowait(op, result);
       })
       .Case([&](omp::TaskloopContextOp op) {
-        // TODO: move clauses from TaskloopOp to here
-      })
-      .Case([&](omp::TaskloopOp op) {
         checkAllocate(op, result);
         checkInReduction(op, result);
         checkReduction(op, result);
@@ -2876,7 +2873,7 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
   // It stores the pointer of allocated firstprivate copies,
   // which can be used later for freeing the allocated space.
   SmallVector<llvm::Value *> llvmFirstPrivateVars;
-  PrivateVarsInfo privateVarsInfo(taskloopOp);
+  PrivateVarsInfo privateVarsInfo(contextOp);
   TaskContextStructManager taskStructMgr{builder, moduleTranslation,
                                          privateVarsInfo.privatizers};
 
@@ -2922,7 +2919,7 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
         initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
                        blockArg, llvmPrivateVarAlloc, initBlock);
     if (!privateVarOrErr)
-      return handleError(privateVarOrErr, *taskloopOp.getOperation());
+      return handleError(privateVarOrErr, *contextOp.getOperation());
 
     llvmFirstPrivateVars[i] = privateVarOrErr.get();
 
@@ -2943,9 +2940,9 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
   // firstprivate copy region
   setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
   if (failed(copyFirstPrivateVars(
-          taskloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
+          contextOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
           taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
-          taskloopOp.getPrivateNeedsBarrier())))
+          contextOp.getPrivateNeedsBarrier())))
     return llvm::failure();
 
   // Set up inserttion point for call to createTaskloop()
@@ -3033,7 +3030,7 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
     // into the outlined function. When the task is duplicated, that structure
     // is duplicated too.
     if (failed(cleanupPrivateVars(builder, moduleTranslation,
-                                  taskloopOp.getLoc(), privateVarsInfo.llvmVars,
+                                  contextOp.getLoc(), privateVarsInfo.llvmVars,
                                   privateVarsInfo.privatizers)))
       return llvm::make_error<PreviouslyReportedError>();
     // Similarly, the task context structure freed inside the task is the
@@ -3108,10 +3105,10 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
       // through a stack allocated structure.
     }
 
-    if (failed(copyFirstPrivateVars(taskloopOp.getOperation(), builder,
+    if (failed(copyFirstPrivateVars(contextOp.getOperation(), builder,
                                     moduleTranslation, srcGEPs, destGEPs,
                                     privateVarsInfo.privatizers,
-                                    taskloopOp.getPrivateNeedsBarrier())))
+                                    contextOp.getPrivateNeedsBarrier())))
       return llvm::make_error<PreviouslyReportedError>();
 
     return builder.saveIP();
@@ -3206,9 +3203,9 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
   llvm::Value *ifCond = nullptr;
   llvm::Value *grainsize = nullptr;
   int sched = 0; // default
-  mlir::Value grainsizeVal = taskloopOp.getGrainsize();
-  mlir::Value numTasksVal = taskloopOp.getNumTasks();
-  if (Value ifVar = taskloopOp.getIfExpr())
+  mlir::Value grainsizeVal = contextOp.getGrainsize();
+  mlir::Value numTasksVal = contextOp.getNumTasks();
+  if (Value ifVar = contextOp.getIfExpr())
     ifCond = moduleTranslation.lookupValue(ifVar);
   if (grainsizeVal) {
     grainsize = moduleTranslation.lookupValue(grainsizeVal);
@@ -3229,17 +3226,17 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
   // task's cleanup block which should be branched to. It doesn't depend upon
   // nogroup because even in that case the taskloop might still be inside an
   // explicit taskgroup.
-  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, taskloopOp,
+  pushCancelFinalizationCB(cancelTerminators, builder, ompBuilder, contextOp,
                            llvm::omp::Directive::OMPD_taskgroup);
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createTaskloop(
           ompLoc, allocaIP, bodyCB, loopInfo, lbVal, ubVal, stepVal,
-          taskloopOp.getUntied(), ifCond, grainsize, taskloopOp.getNogroup(),
-          sched, moduleTranslation.lookupValue(taskloopOp.getFinal()),
-          taskloopOp.getMergeable(),
-          moduleTranslation.lookupValue(taskloopOp.getPriority()),
+          contextOp.getUntied(), ifCond, grainsize, contextOp.getNogroup(),
+          sched, moduleTranslation.lookupValue(contextOp.getFinal()),
+          contextOp.getMergeable(),
+          moduleTranslation.lookupValue(contextOp.getPriority()),
           loopOp.getCollapseNumLoops(), taskDupOrNull,
           taskStructMgr.getStructPtr());
 
diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index 0eeb2f8e2bd6d..b5df004041a78 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -588,10 +588,10 @@ func.func @omp_ordered(%arg0 : index) -> () {
 func.func @omp_taskloop(%arg0: index, %arg1 : memref<i32>) {
   // CHECK: omp.parallel {
   omp.parallel {
-    // CHECK: omp.taskloop.context {
-    omp.taskloop.context {
-      // CHECK: omp.taskloop allocate(%{{.*}} : !llvm.struct<(ptr, ptr, i64)> -> %{{.*}} : !llvm.struct<(ptr, ptr, i64)>) {
-      omp.taskloop allocate(%arg1 : memref<i32> -> %arg1 : memref<i32>) {
+    // CHECK: omp.taskloop.context allocate(%{{.*}} : !llvm.struct<(ptr, ptr, i64)> -> %{{.*}} : !llvm.struct<(ptr, ptr, i64)>) {
+    omp.taskloop.context allocate(%arg1 : memref<i32> -> %arg1 : memref<i32>) {
+      // CHECK: omp.taskloop {
+      omp.taskloop {
         // CHECK: omp.loop_nest (%[[IV:.*]]) : i64 = (%[[ARG0]]) to (%[[ARG0]]) step (%[[ARG0]]) {
         omp.loop_nest (%iv) : index = (%arg0) to (%arg0) step (%arg0) {
           // CHECK-DAG: %[[CAST_IV:.*]] = builtin.unrealized_conversion_cast %[[IV]] : i64 to index
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index fd0c845ada12c..c543b474b78e2 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2030,8 +2030,8 @@ combiner {
 
 func.func @scan_test_2(%lb: i32, %ub: i32, %step: i32) {
   %test1f32 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    omp.taskloop reduction(mod:inscan, @add_f32 %test1f32 -> %arg1 : !llvm.ptr) {
+  omp.taskloop.context reduction(mod:inscan, @add_f32 %test1f32 -> %arg1 : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
   // expected-error @below {{SCAN directive needs to be enclosed within a parent worksharing loop construct or SIMD construct with INSCAN reduction modifier}}
          omp.scan inclusive(%test1f32 : !llvm.ptr)
@@ -2047,15 +2047,15 @@ func.func @scan_test_2(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testmemref = "test.memref"() : () -> (memref<i32>)
-  omp.taskloop.context {
-    // expected-error @below {{expected equal sizes for allocate and allocator variables}}
-    "omp.taskloop"(%testmemref) ({
+  // expected-error @below {{expected equal sizes for allocate and allocator variables}}
+  "omp.taskloop.context"(%testmemref) ({
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
-    }) {operandSegmentSizes = array<i32: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
+    }
     omp.terminator
-  }
+  }) {operandSegmentSizes = array<i32: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
   return
 }
 
@@ -2064,16 +2064,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-    "omp.taskloop"(%testf32, %testf32_2) ({
-    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop.context"(%testf32, %testf32_2) ({
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
-    }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 2>, reduction_syms = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> ()
+    }
     omp.terminator
-  }
+  }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 2>, reduction_syms = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> ()
   return
 }
 
@@ -2081,16 +2081,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-    "omp.taskloop"(%testf32) ({
-    ^bb0(%arg0: !llvm.ptr):
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop.context"(%testf32) ({
+  ^bb0(%arg0: !llvm.ptr):
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
-    }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>, reduction_syms = [@add_f32, @add_f32]} : (!llvm.ptr) -> ()
+    }
     omp.terminator
-  }
+  }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>, reduction_syms = [@add_f32, @add_f32]} : (!llvm.ptr) -> ()
   return
 }
 
@@ -2099,16 +2099,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-    "omp.taskloop"(%testf32, %testf32_2) ({
-    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop.context"(%testf32, %testf32_2) ({
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
-    }) {in_reduction_syms = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 2, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> ()
+    }
     omp.terminator
-  }
+  }) {in_reduction_syms = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 2, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> ()
   return
 }
 
@@ -2116,16 +2116,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-    "omp.taskloop"(%testf32) ({
-    ^bb0(%arg0: !llvm.ptr):
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop.context"(%testf32) ({
+  ^bb0(%arg0: !llvm.ptr):
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
-    }) {in_reduction_syms = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0, 0>} : (!llvm.ptr) -> ()
+    }
     omp.terminator
-  }
+  }) {in_reduction_syms = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0, 0>} : (!llvm.ptr) -> ()
   return
 }
 
@@ -2146,9 +2146,9 @@ combiner {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
-    omp.taskloop nogroup reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
+  omp.taskloop.context nogroup reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
@@ -2174,9 +2174,9 @@ combiner {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop.context {
-    // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
-    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32 -> %arg1 : !llvm.ptr) {
+  // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
+  omp.taskloop.context in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32 -> %arg1 : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
@@ -2190,9 +2190,9 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  omp.taskloop.context {
-    // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
-    omp.taskloop grainsize(%testi64: i64) num_tasks(%testi64: i64) {
+  // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
+  omp.taskloop.context grainsize(%testi64: i64) num_tasks(%testi64: i64) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
@@ -2206,9 +2206,9 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  omp.taskloop.context {
-    // expected-error @below {{invalid grainsize modifier : 'strict1'}}
-    omp.taskloop grainsize(strict1, %testi64: i64) {
+  // expected-error @below {{invalid grainsize modifier : 'strict1'}}
+  omp.taskloop.context grainsize(strict1, %testi64: i64) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
@@ -2221,9 +2221,9 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  omp.taskloop.context {
-    // expected-error @below {{invalid num_tasks modifier : 'default'}}
-    omp.taskloop num_tasks(default, %testi64: i64) {
+  // expected-error @below {{invalid num_tasks modifier : 'default'}}
+  omp.taskloop.context num_tasks(default, %testi64: i64) {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         omp.yield
       }
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 4dd15b2d5c277..6ac804b6d51b2 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2580,10 +2580,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
 
   %testbool = "test.bool"() : () -> (i1)
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop if(%{{[^)]+}}) {
-    omp.taskloop if(%testbool) {
+  // CHECK: omp.taskloop.context if(%{{[^)]+}}) {
+  omp.taskloop.context if(%testbool) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2592,10 +2592,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop final(%{{[^)]+}}) {
-    omp.taskloop final(%testbool) {
+  // CHECK: omp.taskloop.context final(%{{[^)]+}}) {
+  omp.taskloop.context final(%testbool) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2604,10 +2604,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop untied {
-    omp.taskloop untied {
+  // CHECK: omp.taskloop.context untied {
+  omp.taskloop.context untied {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2616,10 +2616,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop mergeable {
-    omp.taskloop mergeable {
+  // CHECK: omp.taskloop.context mergeable {
+  omp.taskloop.context mergeable {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2630,10 +2630,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
 
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  // CHECK: omp.taskloop.context in_reduction(@add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context in_reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2643,10 +2643,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
   }
 
   // Checking byref attribute for in_reduction
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop in_reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-    omp.taskloop in_reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  // CHECK: omp.taskloop.context in_reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context in_reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2655,10 +2655,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-    omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  // CHECK: omp.taskloop.context reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2668,10 +2668,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
   }
 
   // check byref attrbute for reduction
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, byref @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-    omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, byref @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  // CHECK: omp.taskloop.context reduction(byref @add_f32 %{{.+}} -> %{{.+}}, byref @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context reduction(byref @add_f32 %testf32 -> %arg0, byref @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2680,10 +2680,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) {
-    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32_2 -> %arg1 : !llvm.ptr) {
+  // CHECK: omp.taskloop.context in_reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) {
+  omp.taskloop.context in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32_2 -> %arg1 : !llvm.ptr) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2693,10 +2693,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
   }
 
   %testi32 = "test.i32"() : () -> (i32)
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) {
-    omp.taskloop priority(%testi32 : i32) {
+  // CHECK: omp.taskloop.context priority(%{{[^:]+}}: i32) {
+  omp.taskloop.context priority(%testi32 : i32) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2706,10 +2706,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
   }
 
   %testmemref = "test.memref"() : () -> (memref<i32>)
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) {
-    omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) {
+  // CHECK: omp.taskloop.context allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) {
+  omp.taskloop.context allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2719,10 +2719,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
   }
 
   %testi64 = "test.i64"() : () -> (i64)
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop grainsize(%{{[^:]+}}: i64) {
-    omp.taskloop grainsize(%testi64: i64) {
+  // CHECK: omp.taskloop.context grainsize(%{{[^:]+}}: i64) {
+  omp.taskloop.context grainsize(%testi64: i64) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2731,10 +2731,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) {
-    omp.taskloop num_tasks(%testi64: i64) {
+  // CHECK: omp.taskloop.context num_tasks(%{{[^:]+}}: i64) {
+  omp.taskloop.context num_tasks(%testi64: i64) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2743,10 +2743,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop grainsize(strict, %{{[^:]+}}: i64) {
-    omp.taskloop grainsize(strict, %testi64: i64) {
+  // CHECK: omp.taskloop.context grainsize(strict, %{{[^:]+}}: i64) {
+  omp.taskloop.context grainsize(strict, %testi64: i64) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2755,10 +2755,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop num_tasks(strict, %{{[^:]+}}: i64) {
-    omp.taskloop num_tasks(strict, %testi64: i64) {
+  // CHECK: omp.taskloop.context num_tasks(strict, %{{[^:]+}}: i64) {
+  omp.taskloop.context num_tasks(strict, %testi64: i64) {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
@@ -2767,10 +2767,10 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.taskloop.context {
-  omp.taskloop.context {
-    // CHECK: omp.taskloop nogroup {
-    omp.taskloop nogroup {
+  // CHECK: omp.taskloop.context nogroup {
+  omp.taskloop.context nogroup {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
index 8ff9c6cdb11c5..49bdd77226fe4 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
@@ -21,8 +21,8 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
         llvm.store %arg3, %arg2 : i32, !llvm.ptr
         llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
@@ -153,8 +153,8 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
   omp.taskgroup {
-    omp.taskloop.context {
-      omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop.context nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+      omp.taskloop {
         omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
           llvm.store %arg3, %arg2 : i32, !llvm.ptr
           llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
@@ -293,8 +293,8 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
         llvm.store %arg3, %arg2 : i32, !llvm.ptr
         llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
index 0dea869c1bbba..8a4181ad924d7 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
@@ -21,8 +21,8 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
         llvm.store %arg3, %arg2 : i32, !llvm.ptr
         llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
@@ -153,8 +153,8 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
   omp.taskgroup {
-    omp.taskloop.context {
-      omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop.context nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+      omp.taskloop {
         omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
           llvm.store %arg3, %arg2 : i32, !llvm.ptr
           llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
index 406245741388b..a8c17a29fb7b3 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
@@ -20,8 +20,8 @@ llvm.func @_QPtest() {
   %c1_i32 = llvm.mlir.constant(1 :i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3) : i32 = (%c1_i32, %c1_i32) to (%c10_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -73,8 +73,8 @@ llvm.func @_QPtest2() {
   %c2_i32 = llvm.mlir.constant(2 : i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3, %arg4) : i32 = (%c1_i32, %c1_i32, %c2_i32) to (%c10_i32, %c5_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32, %c1_i32) collapse(3) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -131,8 +131,8 @@ llvm.func @_QPtest3() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c20_i32 = llvm.mlir.constant(20 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c20_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -189,8 +189,8 @@ llvm.func @_QPtest4() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c15_i32 = llvm.mlir.constant(15 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3) : i32 = (%c2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -249,8 +249,8 @@ llvm.func @_QPtest5() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c15_i32 = llvm.mlir.constant(15 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3) : i32 = (%cneg2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -305,8 +305,8 @@ llvm.func @_QPtest6() {
   %c1_i32 = llvm.mlir.constant(1 :i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c5_i32, %c5_i32) inclusive step (%cneg1_i32, %c1_i32) collapse(2) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir
index 1685d0065a102..f73a30a5e664d 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir
@@ -14,10 +14,10 @@ llvm.func @_QPtest_taskloop(%arg0: !llvm.ptr) {
   %2 = llvm.mlir.constant(100 : i32) : i32
   %3 = llvm.mlir.constant(1 : i64) : i64
   %4 = llvm.alloca %3 x i32 : (i64) -> !llvm.ptr
-  omp.taskloop.context {
+  omp.taskloop.context private(@_QFtest_taskloopEi_private_i32 %4 -> %arg1 : !llvm.ptr) {
     // test where this alloca ends up
     %5 = llvm.alloca %1 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
-    omp.taskloop private(@_QFtest_taskloopEi_private_i32 %4 -> %arg1 : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%1) to (%2) inclusive step (%1) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         "llvm.intr.memcpy"(%5, %arg0, %0) <{arg_attrs = [{llvm.align = 8 : i64}, {llvm.align = 8 : i64}, {}], isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
index d06e110ce0631..d5d962df33a45 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
@@ -20,8 +20,8 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %true = llvm.mlir.constant(true) : i1
-  omp.taskloop.context {
-    omp.taskloop final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
index ca0f93a95d33d..ba96b59c246c5 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
@@ -20,8 +20,8 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c2_i32 = llvm.mlir.constant(2 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
index 08644531dff0a..63eb721674b6c 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
@@ -22,8 +22,8 @@ llvm.func @_QPtest() {
   %a_val = llvm.load %3 : !llvm.ptr -> i32
   %c20 = llvm.mlir.constant(20 : i32) : i32
   %cmp = llvm.icmp "slt" %a_val, %c20 : i32
-  omp.taskloop.context {
-    omp.taskloop if(%cmp) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context if(%cmp) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
index 0b94d6d7704d5..d04add975badb 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
@@ -19,8 +19,8 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop mergeable private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context mergeable private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 49878de219195..d3f9015f72a9e 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -13,8 +13,8 @@ llvm.func @_QPtest() {
   %3 = llvm.alloca %2 x i32 {bindc_name = "t2"} : (i64) -> !llvm.ptr
   %4 = llvm.alloca %2 x i32 {bindc_name = "t1"} : (i64) -> !llvm.ptr
   %5 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEt1_private_i32 %4 -> %arg0, @_QFtestEt2_private_i32 %3 -> %arg1, @_QFtestEi_private_i32 %5 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEt1_private_i32 %4 -> %arg0, @_QFtestEt2_private_i32 %3 -> %arg1, @_QFtestEi_private_i32 %5 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
         llvm.store %arg3, %arg2 : i32, !llvm.ptr
         omp.yield
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
index 18b9612360238..226831e8fed91 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
@@ -19,8 +19,8 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop nogroup private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context nogroup private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
index 63bb8fcce172d..23c8bcbe3a96d 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
@@ -20,8 +20,8 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c2_i32 = llvm.mlir.constant(2: i32) : i32
-  omp.taskloop.context {
-    omp.taskloop num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
index fb92a66086e8f..875b96a6be1d7 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
@@ -20,8 +20,8 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c1_i32 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop priority(%c1_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context priority(%c1_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
index 2b44d3aceaa65..d13ea3e69de80 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
@@ -19,8 +19,8 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop untied private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context untied private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
@@ -49,8 +49,8 @@ llvm.func @_QPtest_tied() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index eb0521e07b299..d2bb2fabcbbd3 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -19,8 +19,8 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop.context {
-    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+  omp.taskloop.context private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
         llvm.store %arg2, %arg1 : i32, !llvm.ptr
         %10 = llvm.load %arg0 : !llvm.ptr -> i32
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 6531a17776561..79fdacc199c4f 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -302,10 +302,9 @@ llvm.func @taskgroup_task_reduction(%x : !llvm.ptr) {
 
 llvm.func @taskloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
   // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
-  omp.taskloop.context {
-    // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.taskloop operation}}
-    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-    omp.taskloop allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+  // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.taskloop.context operation}}
+  omp.taskloop.context allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
         omp.yield
       }
@@ -328,10 +327,9 @@ llvm.func @taskloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr)
 
 llvm.func @taskloop_inreduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
   // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
-  omp.taskloop.context {
-    // expected-error at below {{not yet implemented: Unhandled clause in_reduction in omp.taskloop operation}}
-    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-    omp.taskloop in_reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+  // expected-error at below {{not yet implemented: Unhandled clause in_reduction in omp.taskloop.context operation}}
+  omp.taskloop.context in_reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
         omp.yield
       }
@@ -354,10 +352,9 @@ llvm.func @taskloop_inreduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.pt
 
 llvm.func @taskloop_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
   // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
-  omp.taskloop.context {
-    // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.taskloop operation}}
-    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-    omp.taskloop reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+  // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.taskloop.context operation}}
+  omp.taskloop.context reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+    omp.taskloop {
       omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
         omp.yield
       }

>From 7a89602583d31dabcfe3a3af2a3495d0d699eaf4 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 24 Mar 2026 11:51:56 +0000
Subject: [PATCH 2/2] Address review comments: mark unused param and move var
 decl

- Mark the unused 'clauses' parameter in TaskloopOp::build with
  [[maybe_unused]]
- Move the declaration of 'wrapperClauseOps' in genStandaloneTaskloop
  to immediately before its first use

Assisted-by: Copilot, Claude Sonnet 4.6
---
 flang/lib/Lower/OpenMP/OpenMP.cpp            | 2 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ba3a967bf6986..df66c8833394b 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3251,7 +3251,6 @@ static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
     lower::pft::Evaluation &eval, mlir::Location loc,
     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
   mlir::omp::TaskloopContextOperands taskloopClauseOps;
-  mlir::omp::TaskloopOperands wrapperClauseOps;
   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
   llvm::SmallVector<const semantics::Symbol *> inReductionSyms;
 
@@ -3283,6 +3282,7 @@ static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
 
   mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
   firOpBuilder.setInsertionPointToStart(&taskLoopContextOp.getRegion().front());
+  mlir::omp::TaskloopOperands wrapperClauseOps;
   auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
       converter, loc, wrapperClauseOps, taskloopArgs);
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 39ded918d25a9..b6a6a4ed00310 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3309,7 +3309,7 @@ LogicalResult TaskloopContextOp::verifyRegions() {
 //===----------------------------------------------------------------------===//
 
 void TaskloopOp::build(OpBuilder &builder, OperationState &state,
-                       const TaskloopOperands &clauses) {
+                       [[maybe_unused]] const TaskloopOperands &clauses) {
   TaskloopOp::build(builder, state);
 }
 



More information about the llvm-branch-commits mailing list