[Mlir-commits] [mlir] f59009d - [mlir][OpenMP] Separate OutlinableInterface from taskloop LoopWrapper (#188068)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Tue Apr 7 09:46:05 PDT 2026


Author: Tom Eccles
Date: 2026-04-07T16:45:59Z
New Revision: f59009d612c1e23e517d77293fd76855047ace5c

URL: https://github.com/llvm/llvm-project/commit/f59009d612c1e23e517d77293fd76855047ace5c
DIFF: https://github.com/llvm/llvm-project/commit/f59009d612c1e23e517d77293fd76855047ace5c.diff

LOG: [mlir][OpenMP] Separate OutlinableInterface from taskloop LoopWrapper (#188068)

Separate taskloop context and loop lowering into different operations.
This allows us to have separate operations representing the outlinable
interface and the loop wrapper interface so that there is somewhere
better than the loop body to put task-local allocations:

```
omp.taskloop.context {
  llvm.alloca ...
  omp.taskloop {
    omp.loop_nest ... {
      ...
    }
  }
  omp.terminator
}
```

The tests for the real functional change are
  - flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
  - mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir

RFC:
https://discourse.llvm.org/t/rfc-openmp-alloca-placement-for-openmp-loop-wrappers/89512/7

This commit keeps all of the clauses on the omp.taskloop op to minimise
changes that have to go into this one commit. I will follow this by
moving clauses to the operation in which they generate code.

Patch 1/3

Added: 
    flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
    flang/test/Lower/OpenMP/taskloop-inreduction.f90
    flang/test/Lower/OpenMP/taskloop-reduction.f90
    mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir

Modified: 
    flang/lib/Lower/OpenMP/OpenMP.cpp
    flang/test/Lower/OpenMP/if-clause.f90
    flang/test/Lower/OpenMP/implicit-dsa.f90
    flang/test/Lower/OpenMP/masked_taskloop.f90
    flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
    flang/test/Lower/OpenMP/taskloop-cancel.f90
    flang/test/Lower/OpenMP/taskloop-collapse.f90
    flang/test/Lower/OpenMP/taskloop-grainsize.f90
    flang/test/Lower/OpenMP/taskloop-numtasks.f90
    flang/test/Lower/OpenMP/taskloop.f90
    mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
    mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
    mlir/test/Dialect/OpenMP/invalid.mlir
    mlir/test/Dialect/OpenMP/ops.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-bounds-cast.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
    mlir/test/Target/LLVMIR/openmp-taskloop.mlir
    mlir/test/Target/LLVMIR/openmp-todo.mlir

Removed: 
    flang/test/Lower/taskloop-inreduction.f90
    flang/test/Lower/taskloop-reduction.f90


################################################################################
diff  --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f86f15921b05e..4cf8774403977 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3260,7 +3260,7 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
   return simdOp;
 }
 
-static mlir::omp::TaskloopOp genStandaloneTaskloop(
+static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
     lower::AbstractConverter &converter, lower::SymMap &symTable,
     lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx,
     lower::pft::Evaluation &eval, mlir::Location loc,
@@ -3289,13 +3289,23 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop(
   taskloopArgs.inReduction.syms = inReductionSyms;
   taskloopArgs.inReduction.vars = taskloopClauseOps.inReductionVars;
 
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+  auto taskLoopContextOp =
+      mlir::omp::TaskloopContextOp::create(firOpBuilder, loc);
+
+  mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
+  firOpBuilder.createBlock(&taskLoopContextOp.getRegion());
+  firOpBuilder.setInsertionPointToStart(&taskLoopContextOp.getRegion().front());
   auto taskLoopOp = genWrapperOp<mlir::omp::TaskloopOp>(
       converter, loc, taskloopClauseOps, taskloopArgs);
 
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, {{taskLoopOp, taskloopArgs}},
                 llvm::omp::Directive::OMPD_taskloop, dsp);
-  return taskLoopOp;
+
+  firOpBuilder.setInsertionPointAfter(taskLoopOp);
+  mlir::omp::TerminatorOp::create(firOpBuilder, loc);
+  return taskLoopContextOp;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90 b/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
new file mode 100644
index 0000000000000..d6b272cc4dd57
--- /dev/null
+++ b/flang/test/Integration/OpenMP/taskloop-alloca-placement.f90
@@ -0,0 +1,31 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+! This test is to ensure that allocas generated inside the loop body of a
+! taskloop are located correctly inside the omp.taskloop.context but not inside
+! the loop itself.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | tco -test-gen | FileCheck %s
+
+! CHECK-LABEL: llvm.func @_QPtest_taskloop(
+subroutine test_taskloop(lb,ub,step,a)
+  integer :: lb, ub, step
+  integer, allocatable :: a(:)
+
+  !$omp taskloop shared(a)
+! CHECK:         omp.taskloop.context {
+! CHECK:           llvm.alloca
+! CHECK:           omp.taskloop private({{.*}}) {
+  do i = lb,ub,step
+! CHECK-NOT:         llvm.alloca
+    a(i) = i
+  enddo
+  !$omp end taskloop
+! CHECK:             omp.yield
+! CHECK-NEXT:      }
+end subroutine

diff  --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90
index e8f69b470e2ca..869d98813b6bf 100644
--- a/flang/test/Lower/OpenMP/if-clause.f90
+++ b/flang/test/Lower/OpenMP/if-clause.f90
@@ -1584,6 +1584,7 @@ program main
   ! TASKLOOP
   ! ----------------------------------------------------------------------------
 
+  ! CHECK:      omp.taskloop.context
   ! CHECK:      omp.taskloop
   ! CHECK-NOT: if({{.*}})
   !$omp taskloop
@@ -1591,6 +1592,7 @@ program main
   end do
   !$omp end taskloop
 
+  ! CHECK:      omp.taskloop.context
   ! CHECK:      omp.taskloop
   ! CHECK-SAME: if({{.*}})
   !$omp taskloop if(.true.)
@@ -1598,6 +1600,7 @@ program main
   end do
   !$omp end taskloop
 
+  ! CHECK:      omp.taskloop.context
   ! CHECK:      omp.taskloop
   ! CHECK-SAME: if({{.*}})
   !$omp taskloop if(taskloop: .true.)

diff  --git a/flang/test/Lower/OpenMP/implicit-dsa.f90 b/flang/test/Lower/OpenMP/implicit-dsa.f90
index 9d01460253899..3f1d80bb6303c 100644
--- a/flang/test/Lower/OpenMP/implicit-dsa.f90
+++ b/flang/test/Lower/OpenMP/implicit-dsa.f90
@@ -352,6 +352,7 @@ subroutine implicit_dsa_test7
 ! CHECK:           %[[DECL_Z:.*]]:2 = hlfir.declare %[[ALLOCA_Z]] {uniq_name = "_QFimplicit_dsa_taskloop_test1Ez"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine implicit_dsa_taskloop_test1
    integer :: x, y, z
+   ! CHECK: omp.taskloop.context {
    ! CHECK: omp.taskloop private(
    ! CHECK-SAME: @[[TASKLOOP_TEST1_Y_PRIVATE]] %[[DECL_Y]]#0 -> %[[ARG0:.*]], @[[TASKLOOP_TEST1_X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG1:.*]], @[[TASKLOOP_TEST1_I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG2:.*]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) {
    ! CHECK: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
@@ -365,6 +366,7 @@ subroutine implicit_dsa_taskloop_test1
    end do
    !$omp end taskloop
 
+   ! CHECK: omp.taskloop.context {
    ! CHECK: omp.taskloop private(@[[TASKLOOP_TEST1_I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
    !$omp taskloop default(shared)
    do i = 1, 100
@@ -387,6 +389,7 @@ subroutine implicit_dsa_taskloop_test2
    integer :: x
    ! CHECK:   omp.parallel {
    !$omp parallel 
+   ! CHECK:   omp.taskloop.context
    ! CHECK:   omp.taskloop private(@[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
    !$omp taskloop
    do i = 1, 100
@@ -395,6 +398,7 @@ subroutine implicit_dsa_taskloop_test2
    end do
    !$omp end taskloop
 
+   ! CHECK: omp.taskloop.context
    ! CHECK: omp.taskloop private(@[[TASKLOOP_TEST2_X_PRIVATE]] %[[X_DECL]]#0 -> %[[ARG0]], @[[TASKLOOP_TEST2_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
    !$omp taskloop private(x)
    do i = 1, 10
@@ -424,6 +428,7 @@ subroutine implicit_dsa_taskloop_test3
    ! CHECK:  omp.parallel private(@[[TASKLOOP_TEST3_X_FIRSTPRIVATE]] %[[X_DECL]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
    ! CHECK:  %[[X_PRIV_VAL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFimplicit_dsa_taskloop_test3Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
    !$omp parallel firstprivate(x)
+   ! CHECK:  omp.taskloop.context
    ! CHECK:  omp.taskloop private(@[[TASKLOOP_TEST3_X_FIRSTPRIVATE]] %[[X_PRIV_VAL]]#0 -> %[[ARG1:.*]], @[[TASKLOOP_TEST3_I_PRIVATE]] %[[I_DECL]]#0 -> %[[ARG2:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
    !$omp taskloop
    ! CHECK:  %[[X_VAL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFimplicit_dsa_taskloop_test3Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

diff  --git a/flang/test/Lower/OpenMP/masked_taskloop.f90 b/flang/test/Lower/OpenMP/masked_taskloop.f90
index 4ace6fe40016f..5f1a824026069 100644
--- a/flang/test/Lower/OpenMP/masked_taskloop.f90
+++ b/flang/test/Lower/OpenMP/masked_taskloop.f90
@@ -22,18 +22,21 @@
 ! CHECK:            %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:            %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:            %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:            omp.taskloop private(
-! CHECK-SAME:          @[[J_FIRSTPRIVATE]] %[[DECL_J]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK:              omp.loop_nest (%arg2) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
-! CHECK:                %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_masked_taskloopEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                %[[VAL2:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFtest_masked_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                hlfir.assign %arg2 to %[[VAL2]]#0 : i32, !fir.ref<i32>
-! CHECK:                %[[LOAD_J:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
-! CHECK:                %[[C1_I32_1:.*]] = arith.constant 1 : i32
-! CHECK:                %[[RES_J:.*]] = arith.addi %[[LOAD_J]], %[[C1_I32_1]] : i32
-! CHECK:                hlfir.assign %[[RES_J]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
-! CHECK:                omp.yield
+! CHECK:            omp.taskloop.context {
+! CHECK:              omp.taskloop private(
+! CHECK-SAME:            @[[J_FIRSTPRIVATE]] %[[DECL_J]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:                omp.loop_nest (%arg2) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
+! CHECK:                  %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_masked_taskloopEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                  %[[VAL2:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFtest_masked_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                  hlfir.assign %arg2 to %[[VAL2]]#0 : i32, !fir.ref<i32>
+! CHECK:                  %[[LOAD_J:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
+! CHECK:                  %[[C1_I32_1:.*]] = arith.constant 1 : i32
+! CHECK:                  %[[RES_J:.*]] = arith.addi %[[LOAD_J]], %[[C1_I32_1]] : i32
+! CHECK:                  hlfir.assign %[[RES_J]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
+! CHECK:                  omp.yield
+! CHECK:                }
 ! CHECK:              }
+! CHECK:              omp.terminator
 ! CHECK:            }
 ! CHECK:            omp.terminator
 ! CHECK:          }

diff  --git a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
index e686d080090e1..eeac5719f895e 100644
--- a/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
+++ b/flang/test/Lower/OpenMP/parallel-masked-taskloop.f90
@@ -16,16 +16,19 @@
 ! CHECK:              %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:              %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:              %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:              omp.taskloop private(@[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
-! CHECK:                omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%c1_i32_0) {
-! CHECK:                  %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_parallel_master_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:                  hlfir.assign %[[ARG1]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
-! CHECK:                  %[[LOAD_J:.*]] = fir.load %[[DECL_J]]#0 : !fir.ref<i32>
-! CHECK:                  %c1_i32_1 = arith.constant 1 : i32
-! CHECK:                  %[[RES_ADD:.*]] = arith.addi %[[LOAD_J]], %c1_i32_1 : i32
-! CHECK:                  hlfir.assign %[[RES_ADD]] to %[[DECL_J]]#0 : i32, !fir.ref<i32>
-! CHECK:                  omp.yield
+! CHECK:              omp.taskloop.context {
+! CHECK:                omp.taskloop private(@[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:                  omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%c1_i32_0) {
+! CHECK:                    %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFtest_parallel_master_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                    hlfir.assign %[[ARG1]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
+! CHECK:                    %[[LOAD_J:.*]] = fir.load %[[DECL_J]]#0 : !fir.ref<i32>
+! CHECK:                    %c1_i32_1 = arith.constant 1 : i32
+! CHECK:                    %[[RES_ADD:.*]] = arith.addi %[[LOAD_J]], %c1_i32_1 : i32
+! CHECK:                    hlfir.assign %[[RES_ADD]] to %[[DECL_J]]#0 : i32, !fir.ref<i32>
+! CHECK:                    omp.yield
+! CHECK:                  }
 ! CHECK:                }
+! CHECK:                omp.terminator
 ! CHECK:              }
 ! CHECK:              omp.terminator
 ! CHECK:            }

diff  --git a/flang/test/Lower/OpenMP/taskloop-cancel.f90 b/flang/test/Lower/OpenMP/taskloop-cancel.f90
index 710617793c3e7..036d0071d9e80 100644
--- a/flang/test/Lower/OpenMP/taskloop-cancel.f90
+++ b/flang/test/Lower/OpenMP/taskloop-cancel.f90
@@ -12,13 +12,16 @@
 ! CHECK:           %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:           %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:           %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:           omp.taskloop private(@[[I_PRIVATE]] %2#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
-! CHECK:             omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
-! CHECK:               %[[IDX:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:               hlfir.assign %[[ARG1]] to %[[IDX]]#0 : i32, !fir.ref<i32>
-! CHECK:               omp.cancel cancellation_construct_type(taskgroup)
-! CHECK:               omp.yield
+! CHECK:           omp.taskloop.context {
+! CHECK:             omp.taskloop private(@[[I_PRIVATE]] %2#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+! CHECK:               omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
+! CHECK:                 %[[IDX:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 hlfir.assign %[[ARG1]] to %[[IDX]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.cancel cancellation_construct_type(taskgroup)
+! CHECK:                 omp.yield
+! CHECK:               }
 ! CHECK:             }
+! CHECK:             omp.terminator
 ! CHECK:           }
 ! CHECK:           omp.terminator
 ! CHECK:         }

diff  --git a/flang/test/Lower/OpenMP/taskloop-collapse.f90 b/flang/test/Lower/OpenMP/taskloop-collapse.f90
index 48243640d07b9..288d06c7e5a37 100644
--- a/flang/test/Lower/OpenMP/taskloop-collapse.f90
+++ b/flang/test/Lower/OpenMP/taskloop-collapse.f90
@@ -21,7 +21,8 @@ subroutine test()
     integer :: i, j, sum
 
     !$omp taskloop collapse(2)
-    ! CHECK-LABEL: omp.taskloop
+    ! CHECK:      omp.taskloop.context
+    ! CHECK:      omp.taskloop
     ! CHECK-SAME: private(@_QFtestEsum_firstprivate_i32 %[[DECLARE_SUM]]#0 -> %arg0, @_QFtestEi_private_i32 %[[DECLARE_I]]#0 -> %arg1, @_QFtestEj_private_i32 %[[DECLARE_J]]#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>)
     ! CHECK-LABEL: omp.loop_nest
     ! CHECK-SAME: (%arg3, %arg4) : i32 = (%c1_i32, %c1_i32_1) to (%c10_i32, %c5_i32) inclusive step (%c1_i32_0, %c1_i32_2) collapse(2)

diff  --git a/flang/test/Lower/OpenMP/taskloop-grainsize.f90 b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
index 8aee5f69c849f..eaad0b08f1a8e 100644
--- a/flang/test/Lower/OpenMP/taskloop-grainsize.f90
+++ b/flang/test/Lower/OpenMP/taskloop-grainsize.f90
@@ -27,6 +27,7 @@
 ! CHECK:          %[[GRAINSIZE:.*]] = arith.constant 10 : i32
 subroutine test_grainsize
    integer :: i, x
+   ! CHECK:        omp.taskloop.context {
    ! CHECK:          omp.taskloop grainsize(%[[GRAINSIZE]]: i32)
    ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
    ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
@@ -41,7 +42,8 @@ end subroutine test_grainsize
 subroutine test_grainsize_strict
   integer :: i, x
   ! CHECK: %[[GRAINSIZE:.*]] = arith.constant 10 : i32
-  ! CHECK: omp.taskloop grainsize(strict, %[[GRAINSIZE]]: i32)
+  ! CHECK: omp.taskloop.context {
+  ! CHECK:   omp.taskloop grainsize(strict, %[[GRAINSIZE]]: i32)
   !$omp taskloop grainsize(strict:10)
   do i = 1, 1000
      !CHECK: arith.addi

diff  --git a/flang/test/Lower/taskloop-inreduction.f90 b/flang/test/Lower/OpenMP/taskloop-inreduction.f90
similarity index 98%
rename from flang/test/Lower/taskloop-inreduction.f90
rename to flang/test/Lower/OpenMP/taskloop-inreduction.f90
index e7d3f96115fbd..a442c497e60be 100644
--- a/flang/test/Lower/taskloop-inreduction.f90
+++ b/flang/test/Lower/OpenMP/taskloop-inreduction.f90
@@ -25,6 +25,7 @@
 subroutine omp_taskloop_inreduction()
    integer x
    x = 0
+   ! CHECK:        omp.taskloop.context {
    ! CHECK:        omp.taskloop in_reduction(@[[ADD_RED_I32]] 
    ! CHECK:        %[[DECL_X]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) private(@[[PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
    ! CHECK:        %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG0]] 

diff  --git a/flang/test/Lower/OpenMP/taskloop-numtasks.f90 b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
index e5b7a49748c51..668bde7835c72 100644
--- a/flang/test/Lower/OpenMP/taskloop-numtasks.f90
+++ b/flang/test/Lower/OpenMP/taskloop-numtasks.f90
@@ -27,6 +27,7 @@
 ! CHECK:          %[[VAL_NUMTASKS:.*]] = arith.constant 10 : i32
 subroutine test_num_tasks
    integer :: i, x
+   ! CHECK:        omp.taskloop.context {
    ! CHECK:          omp.taskloop num_tasks(%[[VAL_NUMTASKS]]: i32)
    ! CHECK-SAME:        private(@[[X_FIRSTPRIVATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
    ! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
@@ -41,7 +42,8 @@ end subroutine test_num_tasks
 subroutine test_num_tasks_strict
   integer :: x, i
   ! CHECK:  %[[NUM_TASKS:.*]] = arith.constant 10 : i32
-  ! CHECK: omp.taskloop num_tasks(strict, %[[NUM_TASKS]]: i32)
+  ! CHECK: omp.taskloop.context {
+  ! CHECK:   omp.taskloop num_tasks(strict, %[[NUM_TASKS]]: i32)
   !$omp taskloop num_tasks(strict:10)
   do i = 1, 100
      !CHECK: arith.addi

diff  --git a/flang/test/Lower/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/taskloop-reduction.f90
similarity index 98%
rename from flang/test/Lower/taskloop-reduction.f90
rename to flang/test/Lower/OpenMP/taskloop-reduction.f90
index e45c0181bcc8b..6c7e74f051899 100644
--- a/flang/test/Lower/taskloop-reduction.f90
+++ b/flang/test/Lower/OpenMP/taskloop-reduction.f90
@@ -25,6 +25,7 @@
 subroutine omp_taskloop_reduction()
    integer x
    x = 0
+   ! CHECK:       omp.taskloop.context {
    ! CHECK:       omp.taskloop private(@[[PRIVATE_I]] 
    ! CHECK-SAME:  %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) reduction(@[[ADD_RED_I32]] %[[DECL_X]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
    ! CHECK:       %[[VAL_ARG1:.*]]:2 = hlfir.declare %[[ARG1]] 

diff  --git a/flang/test/Lower/OpenMP/taskloop.f90 b/flang/test/Lower/OpenMP/taskloop.f90
index bfe4fe7002811..e379646703e99 100644
--- a/flang/test/Lower/OpenMP/taskloop.f90
+++ b/flang/test/Lower/OpenMP/taskloop.f90
@@ -54,17 +54,20 @@
 ! CHECK:          %[[C1_I32:.*]] = arith.constant 1 : i32
 ! CHECK:          %[[C10_I32:.*]] = arith.constant 10 : i32
 ! CHECK:          %[[C1_I32_0:.*]] = arith.constant 1 : i32
-! CHECK:          omp.taskloop private(@[[RES_FIRSTPRIVATE]] %[[RES_VAL]]#0 -> %[[PRIV_RES:.*]], @[[I_PRIVATE]] %[[I_VAL]]#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK:            omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
-! CHECK:              %[[RES_DECL:.*]]:2 = hlfir.declare %[[PRIV_RES]] {uniq_name = "_QFomp_taskloopEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:              %[[I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK:              hlfir.assign %[[ARG2]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK:              %[[LOAD_RES:.*]] = fir.load %[[RES_DECL]]#0 : !fir.ref<i32>
-! CHECK:              %[[C1_I32_1:.*]] = arith.constant 1 : i32
-! CHECK:              %[[OUT_VAL:.*]] = arith.addi %[[LOAD_RES]], %[[C1_I32_1]] : i32
-! CHECK:              hlfir.assign %[[OUT_VAL]] to %[[RES_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK:              omp.yield
+! CHECK:          omp.taskloop.context {
+! CHECK:            omp.taskloop private(@[[RES_FIRSTPRIVATE]] %[[RES_VAL]]#0 -> %[[PRIV_RES:.*]], @[[I_PRIVATE]] %[[I_VAL]]#0 -> %[[PRIV_I:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:              omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[C1_I32]]) to (%[[C10_I32]]) inclusive step (%[[C1_I32_0]]) {
+! CHECK:                %[[RES_DECL:.*]]:2 = hlfir.declare %[[PRIV_RES]] {uniq_name = "_QFomp_taskloopEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                %[[I_DECL:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFomp_taskloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                hlfir.assign %[[ARG2]] to %[[I_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK:                %[[LOAD_RES:.*]] = fir.load %[[RES_DECL]]#0 : !fir.ref<i32>
+! CHECK:                %[[C1_I32_1:.*]] = arith.constant 1 : i32
+! CHECK:                %[[OUT_VAL:.*]] = arith.addi %[[LOAD_RES]], %[[C1_I32_1]] : i32
+! CHECK:                hlfir.assign %[[OUT_VAL]] to %[[RES_DECL]]#0 : i32, !fir.ref<i32>
+! CHECK:                omp.yield
+! CHECK:              }
 ! CHECK:            }
+! CHECK:            omp.terminator
 ! CHECK:          }
 ! CHECK:          return
 ! CHECK:        }
@@ -86,15 +89,16 @@ end subroutine omp_taskloop
 ! CHECK:           %[[DECL_RES:.*]]:2 = hlfir.declare %[[ALLOCA_RES]] {uniq_name = "_QFomp_taskloop_privateEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine omp_taskloop_private
   integer :: res, i
-! CHECK:           omp.taskloop private(@[[RES_PRIVATE_TEST2]] %[[DECL_RES]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST2]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
-! CHECK:             omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
-! CHECK:               %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_privateEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           omp.taskloop.context {
+! CHECK:             omp.taskloop private(@[[RES_PRIVATE_TEST2]] %[[DECL_RES]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST2]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:               omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
+! CHECK:                 %[[VAL1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_privateEres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
   !$omp taskloop private(res)
   do i = 1, 10
-! CHECK:               %[[LOAD_RES:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
-! CHECK:               %[[C1_I32_1:.*]] = arith.constant 1 : i32
-! CHECK:               %[[ADD_VAL:.*]] = arith.addi %[[LOAD_RES]], %[[C1_I32_1]] : i32
-! CHECK:               hlfir.assign %[[ADD_VAL]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
+! CHECK:                 %[[LOAD_RES:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
+! CHECK:                 %[[C1_I32_1:.*]] = arith.constant 1 : i32
+! CHECK:                 %[[ADD_VAL:.*]] = arith.addi %[[LOAD_RES]], %[[C1_I32_1]] : i32
+! CHECK:                 hlfir.assign %[[ADD_VAL]] to %[[VAL1]]#0 : i32, !fir.ref<i32>
      res = res + 1
   end do
 ! CHECK:           return
@@ -114,6 +118,7 @@ end subroutine omp_taskloop_private
 subroutine taskloop_allocate()
    use omp_lib
    integer :: x
+   ! CHECK:         omp.taskloop.context {
    ! CHECK:           omp.taskloop allocate(%{{.*}} : i64 -> %[[DECL_X]]#0 : !fir.ref<i32>) 
    ! CHECK-SAME:      private(@[[X_PRIVATE_TEST_ALLOCATE]] %[[DECL_X]]#0 -> %[[ARG0:.*]], @[[I_PRIVATE_TEST_ALLOCATE]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
    !$omp taskloop allocate(omp_high_bw_mem_alloc: x) private(x)
@@ -133,7 +138,8 @@ end subroutine taskloop_allocate
 ! CHECK:           %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtaskloop_finalEi"}
 ! CHECK:           %[[DECL_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtaskloop_finalEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 subroutine taskloop_final()
-    ! CHECK:  omp.taskloop final(%true) private(@[[I_PRIVATE_FINAL]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
+    ! CHECK:  omp.taskloop.context {
+    ! CHECK:    omp.taskloop final(%true) private(@[[I_PRIVATE_FINAL]] %[[DECL_I]]#0 -> %[[ARG0:.*]] : !fir.ref<i32>) {
    !$omp taskloop final(.true.)
    do i = 1, 100
       ! CHECK: fir.call @_QPfoo()
@@ -154,7 +160,8 @@ subroutine taskloop_final()
 ! CHECK:           %[[VAL_BAR:.*]] = fir.convert %[[LOAD_VAL]] : (!fir.logical<4>) -> i1
 subroutine omp_taskloop_if(bar)
    logical, intent(inout) :: bar
-   !CHECK: omp.taskloop if(%[[VAL_BAR]]) private(@[[I_PRIVATE_IF_TEST1]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
+   !CHECK: omp.taskloop.context {
+   !CHECK:   omp.taskloop if(%[[VAL_BAR]]) private(@[[I_PRIVATE_IF_TEST1]] %[[DECL_I]]#0 -> %[[ARG1:.*]] : !fir.ref<i32>) {
    !$omp taskloop if(bar)
    do i = 1, 10
       call foo()
@@ -168,7 +175,8 @@ end subroutine omp_taskloop_if
 
 ! CHECK-LABEL:  func.func @_QPtest_mergeable
 subroutine test_mergeable
-  ! CHECK: omp.taskloop mergeable
+  ! CHECK: omp.taskloop.context {
+  ! CHECK:   omp.taskloop mergeable
   !$omp taskloop mergeable
   do i = 1, 10
   end do
@@ -184,7 +192,8 @@ end subroutine test_mergeable
 ! CHECK:          %[[LOAD_VAL:.*]] = fir.load %[[VAL1]]#0 : !fir.ref<i32>
 subroutine test_priority(n)
    integer, intent(inout) :: n
-   ! CHECK:  omp.taskloop priority(%[[LOAD_VAL]] : i32)
+   ! CHECK:  omp.taskloop.context {
+   ! CHECK:    omp.taskloop priority(%[[LOAD_VAL]] : i32)
    !$omp taskloop priority(n)
    do i = 1, 10
    end do
@@ -197,7 +206,8 @@ end subroutine test_priority
 
 ! CHECK-LABEL:  func.func @_QPomp_taskloop_untied
 subroutine omp_taskloop_untied()
-  ! CHECK: omp.taskloop untied
+  ! CHECK: omp.taskloop.context {
+  ! CHECK:   omp.taskloop untied
   !$omp taskloop untied
   do i = 1, 10
     call foo()
@@ -210,7 +220,8 @@ subroutine omp_taskloop_untied()
 !===============================================================================
 
 subroutine omp_taskloop_nogroup()
-  ! CHECK: omp.taskloop nogroup
+  ! CHECK: omp.taskloop.context {
+  ! CHECK:   omp.taskloop nogroup
   !$omp taskloop nogroup
   do i = 1, 10
     call foo()
@@ -230,7 +241,8 @@ subroutine omp_taskloop_nogroup()
 subroutine omp_taskloop_lastprivate()
    integer x
    x = 0
-   ! CHECK:  omp.taskloop private(@[[LAST_PRIVATE_X]] %[[DECL_X]]#0 -> %[[ARG0]], @[[LAST_PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1]] : !fir.ref<i32>, !fir.ref<i32>) {
+   ! CHECK:  omp.taskloop.context {
+   ! CHECK:    omp.taskloop private(@[[LAST_PRIVATE_X]] %[[DECL_X]]#0 -> %[[ARG0]], @[[LAST_PRIVATE_I]] %[[DECL_I]]#0 -> %[[ARG1]] : !fir.ref<i32>, !fir.ref<i32>) {
    !$omp taskloop lastprivate(x)
    do i = 1, 100
       ! CHECK: %[[DECL_ARG0:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_taskloop_lastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
@@ -245,5 +257,6 @@ subroutine omp_taskloop_lastprivate()
       ! CHECK:  }
       ! CHECK:  omp.yield
    end do
+   ! CHECK:  omp.terminator
    !$omp end taskloop
 end subroutine omp_taskloop_lastprivate

diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 6a2fd44841572..054a254588e60 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -972,10 +972,57 @@ def TaskOp
   let hasVerifier = 1;
 }
 
+// SingleBlock restriction is just to ease implementation. It would be okay to
+// relax this.
+def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
+    AutomaticAllocationScope, RecursiveMemoryEffects, SingleBlock,
+    DeclareOpInterfaceMethods<OutlineableOpenMPOpInterface>
+  ], clauses = [ /* TODO */], singleRegion = true> {
+  let summary = "OutlinableOpenMPOpInterface wrapper for taskloop construct";
+  let description = [{
+    The taskloop construct specifies that the iterations of one or more
+    associated loops will be executed in parallel using explicit tasks. The
+    iterations are distributed across tasks generated by the construct and
+    scheduled to be executed. The representation of this construct is split
+    between omp.taskloop.context and omp.taskloop.
+
+    The taskloop construct must be a loop wrapper to support composite
+    constructs such as taskloop simd. Loop wrappers do not allow intervening
+    operations between the wrapper and wrapped loop(wrapper). But unlike other
+    loop wrappers, the body of taskloop is also outlined. This outlining means
+    that we need a way to represent the correct location of allocas for
+    temporaries created inside of the loop body. In order to achieve this,
+    the outlining part is represented in this operation: `omp.taskloop.context`,
+    and the loop wrapping is represented in `omp.taskloop`. For example:
+    ```
+    omp.taskloop.context <clauses> {
+      // task-local stack allocations can go here
+      omp.taskloop <clauses> {
+        omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+          %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+          %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+          %sum = arith.addf %a, %b : f32
+          store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+          omp.yield
+        }
+      }
+      omp.terminator
+    }
+    ```
+  }] # clausesDescription;
+
+  let assemblyFormat = "$region attr-dict";
+
+  let extraClassDeclaration = [{
+    TaskloopOp getLoopOp();
+  }] # clausesExtraClassDeclaration;
+
+  let hasRegionVerifier = 1;
+}
+
 def TaskloopOp : OpenMP_Op<"taskloop", traits = [
-    AttrSizedOperandSegments, AutomaticAllocationScope,
+    AttrSizedOperandSegments,
     DeclareOpInterfaceMethods<ComposableOpInterface>,
-    DeclareOpInterfaceMethods<OutlineableOpenMPOpInterface>,
     DeclareOpInterfaceMethods<LoopWrapperInterface>, NoTerminator,
     RecursiveMemoryEffects, SingleBlock
   ], clauses = [
@@ -989,21 +1036,29 @@ def TaskloopOp : OpenMP_Op<"taskloop", traits = [
     The taskloop construct specifies that the iterations of one or more
     associated loops will be executed in parallel using explicit tasks. The
     iterations are distributed across tasks generated by the construct and
-    scheduled to be executed.
+    scheduled to be executed. The representation of this construct is split
+    between omp.taskloop.context and omp.taskloop.
+
+    This operation is intended to act as the loop wrapper portion of the
+    taskloop op definition. See the description of omp.taskloop.context for more
+    details.
 
     The body region can only contain a single block which must contain a single
     operation. This operation must be another compatible loop wrapper or an
     `omp.loop_nest`.
 
     ```
-    omp.taskloop <clauses> {
-      omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
-        %a = load %arrA[%i1, %i2] : memref<?x?xf32>
-        %b = load %arrB[%i1, %i2] : memref<?x?xf32>
-        %sum = arith.addf %a, %b : f32
-        store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
-        omp.yield
+    omp.taskloop.context <clauses> {
+      omp.taskloop <clauses> {
+        omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+          %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+          %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+          %sum = arith.addf %a, %b : f32
+          store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+          omp.yield
+        }
       }
+      omp.terminator
     }
     ```
 
@@ -1049,7 +1104,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", traits = [
   }];
 
   let extraClassDeclaration = [{
-    void getEffects(SmallVectorImpl<MemoryEffects::EffectInstance> &effects);
+    TaskloopContextOp getTaskloopContext();
   }] # clausesExtraClassDeclaration;
 
   let hasVerifier = 1;

diff  --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 82fbc909f5275..5aec66af06d9a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3359,6 +3359,31 @@ LogicalResult TaskgroupOp::verify() {
                                 getTaskReductionByref());
 }
 
+//===----------------------------------------------------------------------===//
+// TaskloopContextOp
+//===----------------------------------------------------------------------===//
+
+TaskloopOp TaskloopContextOp::getLoopOp() {
+  return cast<TaskloopOp>(
+      *llvm::find_if(getRegion().front(),
+                     [](mlir::Operation &op) { return isa<TaskloopOp>(op); }));
+}
+
+LogicalResult TaskloopContextOp::verifyRegions() {
+  Region &region = getRegion();
+  if (region.empty())
+    return emitOpError() << "expected non-empty region";
+
+  auto count = llvm::count_if(
+      region.front(), [](mlir::Operation &op) { return isa<TaskloopOp>(op); });
+  if (count != 1)
+    return emitOpError() << "expected exactly 1 TaskloopOp directly nested in "
+                            "the region, but "
+                         << count << " were found";
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // TaskloopOp
 //===----------------------------------------------------------------------===//
@@ -3380,7 +3405,15 @@ void TaskloopOp::build(OpBuilder &builder, OperationState &state,
       makeArrayAttr(ctx, clauses.reductionSyms), clauses.untied);
 }
 
+TaskloopContextOp TaskloopOp::getTaskloopContext() {
+  return dyn_cast<TaskloopContextOp>(getOperation()->getParentOp());
+}
+
 LogicalResult TaskloopOp::verify() {
+  TaskloopContextOp context = getTaskloopContext();
+  if (!context)
+    return emitOpError() << "expected to be nested in a taskloop context op";
+
   if (getAllocateVars().size() != getAllocatorVars().size())
     return emitError(
         "expected equal sizes for allocate and allocator variables");

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 74db1d8090a8e..2c07d2e9eff30 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -421,6 +421,9 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkDepend(op, result);
         checkNowait(op, result);
       })
+      .Case([&](omp::TaskloopContextOp op) {
+        // TODO: move clauses from TaskloopOp to here
+      })
       .Case([&](omp::TaskloopOp op) {
         checkAllocate(op, result);
         checkInReduction(op, result);
@@ -2852,12 +2855,35 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
-// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+/// The correct entry point is convertOmpTaskloopContextOp. This gets called
+/// whilst lowering the body of the taskloop context (i.e. the task function).
 static LogicalResult
-convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
+convertOmpTaskloopOp(omp::TaskloopOp taskloopOp, llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation) {
+  mlir::Operation &opInst = *taskloopOp.getOperation();
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  // Recurse into the loop body.
+  auto continuationBlockOrError =
+      convertOmpOpRegions(taskloopOp.getRegion(), "omp.taskloop.region",
+                          builder, moduleTranslation);
+
+  if (failed(handleError(continuationBlockOrError, opInst)))
+    return failure();
+
+  builder.SetInsertPoint(continuationBlockOrError.get());
+  return success();
+}
+
+// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
+                            llvm::IRBuilderBase &builder,
+                            LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  auto taskloopOp = cast<omp::TaskloopOp>(opInst);
+  mlir::Operation &opInst = *contextOp.getOperation();
+  omp::TaskloopOp taskloopOp = contextOp.getLoopOp();
   if (failed(checkImplementationStatus(opInst)))
     return failure();
 
@@ -3004,9 +3030,11 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
       moduleTranslation.mapValue(blockArg, llvmPrivateVar);
     }
 
-    auto continuationBlockOrError =
-        convertOmpOpRegions(taskloopOp.getRegion(), "omp.taskloop.region",
-                            builder, moduleTranslation);
+    // Lower the contents of the taskloop context region: this is the body of
+    // the generated task, not the loop.
+    auto continuationBlockOrError = convertOmpOpRegions(
+        contextOp.getRegion(), "omp.taskloop.context.region", builder,
+        moduleTranslation);
 
     if (failed(handleError(continuationBlockOrError, opInst)))
       return llvm::make_error<PreviouslyReportedError>();
@@ -3094,9 +3122,10 @@ convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
       // through a stack allocated structure.
     }
 
-    if (failed(copyFirstPrivateVars(
-            &opInst, builder, moduleTranslation, srcGEPs, destGEPs,
-            privateVarsInfo.privatizers, taskloopOp.getPrivateNeedsBarrier())))
+    if (failed(copyFirstPrivateVars(taskloopOp.getOperation(), builder,
+                                    moduleTranslation, srcGEPs, destGEPs,
+                                    privateVarsInfo.privatizers,
+                                    taskloopOp.getPrivateNeedsBarrier())))
       return llvm::make_error<PreviouslyReportedError>();
 
     return builder.saveIP();
@@ -7505,6 +7534,18 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
       isa_and_present<omp::LoopWrapperInterface>(op) &&
       !dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
 
+  // The TASKLOOP construct is implemented with an outer taskloop.context
+  // operation which is not a loop wrapper, containing an inner taskloop
+  // operation which is a loop wrapper. The stack frame should be pushed when
+  // translating the outer taskloop.context and popped when translating the
+  // inner taskloop which is a loop wrapper. We need access to the loop
+  // information in the outer taskloop context so we need to create it and pop
+  // it around the taskloop context not the inner loop wrapper.
+  if (isa<omp::TaskloopContextOp>(op))
+    isOutermostLoopWrapper = true;
+  else if (isa<omp::TaskloopOp>(op))
+    isOutermostLoopWrapper = false;
+
   if (isOutermostLoopWrapper)
     moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
 
@@ -7602,7 +7643,10 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
             return convertOmpTaskOp(op, builder, moduleTranslation);
           })
           .Case([&](omp::TaskloopOp op) {
-            return convertOmpTaskloopOp(*op, builder, moduleTranslation);
+            return convertOmpTaskloopOp(op, builder, moduleTranslation);
+          })
+          .Case([&](omp::TaskloopContextOp op) {
+            return convertOmpTaskloopContextOp(op, builder, moduleTranslation);
           })
           .Case([&](omp::TaskgroupOp op) {
             return convertOmpTaskgroupOp(op, builder, moduleTranslation);

diff  --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
index b122f425f0752..0eeb2f8e2bd6d 100644
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@@ -588,15 +588,19 @@ func.func @omp_ordered(%arg0 : index) -> () {
 func.func @omp_taskloop(%arg0: index, %arg1 : memref<i32>) {
   // CHECK: omp.parallel {
   omp.parallel {
-    // CHECK: omp.taskloop allocate(%{{.*}} : !llvm.struct<(ptr, ptr, i64)> -> %{{.*}} : !llvm.struct<(ptr, ptr, i64)>) {
-    omp.taskloop allocate(%arg1 : memref<i32> -> %arg1 : memref<i32>) {
-      // CHECK: omp.loop_nest (%[[IV:.*]]) : i64 = (%[[ARG0]]) to (%[[ARG0]]) step (%[[ARG0]]) {
-      omp.loop_nest (%iv) : index = (%arg0) to (%arg0) step (%arg0) {
-        // CHECK-DAG: %[[CAST_IV:.*]] = builtin.unrealized_conversion_cast %[[IV]] : i64 to index
-        // CHECK: "test.payload"(%[[CAST_IV]]) : (index) -> ()
-        "test.payload"(%iv) : (index) -> ()
-        omp.yield
+    // CHECK: omp.taskloop.context {
+    omp.taskloop.context {
+      // CHECK: omp.taskloop allocate(%{{.*}} : !llvm.struct<(ptr, ptr, i64)> -> %{{.*}} : !llvm.struct<(ptr, ptr, i64)>) {
+      omp.taskloop allocate(%arg1 : memref<i32> -> %arg1 : memref<i32>) {
+        // CHECK: omp.loop_nest (%[[IV:.*]]) : i64 = (%[[ARG0]]) to (%[[ARG0]]) step (%[[ARG0]]) {
+        omp.loop_nest (%iv) : index = (%arg0) to (%arg0) step (%arg0) {
+          // CHECK-DAG: %[[CAST_IV:.*]] = builtin.unrealized_conversion_cast %[[IV]] : i64 to index
+          // CHECK: "test.payload"(%[[CAST_IV]]) : (index) -> ()
+          "test.payload"(%iv) : (index) -> ()
+          omp.yield
+        }
       }
+    omp.terminator
     }
     omp.terminator
   }

diff  --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index db5d1b60c5697..257cc0976bd1f 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2050,12 +2050,15 @@ combiner {
 
 func.func @scan_test_2(%lb: i32, %ub: i32, %step: i32) {
   %test1f32 = "test.f32"() : () -> (!llvm.ptr)
-  omp.taskloop reduction(mod:inscan, @add_f32 %test1f32 -> %arg1 : !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+  omp.taskloop.context {
+    omp.taskloop reduction(mod:inscan, @add_f32 %test1f32 -> %arg1 : !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
   // expected-error @below {{SCAN directive needs to be enclosed within a parent worksharing loop construct or SIMD construct with INSCAN reduction modifier}}
-       omp.scan inclusive(%test1f32 : !llvm.ptr)
-        omp.yield
+         omp.scan inclusive(%test1f32 : !llvm.ptr)
+          omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2064,12 +2067,15 @@ func.func @scan_test_2(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testmemref = "test.memref"() : () -> (memref<i32>)
-  // expected-error @below {{expected equal sizes for allocate and allocator variables}}
-  "omp.taskloop"(%testmemref) ({
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
-    }
-  }) {operandSegmentSizes = array<i32: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
+  omp.taskloop.context {
+    // expected-error @below {{expected equal sizes for allocate and allocator variables}}
+    "omp.taskloop"(%testmemref) ({
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
+    }) {operandSegmentSizes = array<i32: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0>} : (memref<i32>) -> ()
+    omp.terminator
+  }
   return
 }
 
@@ -2078,13 +2084,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-  "omp.taskloop"(%testf32, %testf32_2) ({
-  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
-    }
-  }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 2>, reduction_syms = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> ()
+  omp.taskloop.context {
+    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+    "omp.taskloop"(%testf32, %testf32_2) ({
+    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
+    }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 2>, reduction_syms = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> ()
+    omp.terminator
+  }
   return
 }
 
@@ -2092,13 +2101,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-  "omp.taskloop"(%testf32) ({
-  ^bb0(%arg0: !llvm.ptr):
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
-    }
-  }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>, reduction_syms = [@add_f32, @add_f32]} : (!llvm.ptr) -> ()
+  omp.taskloop.context {
+    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+    "omp.taskloop"(%testf32) ({
+    ^bb0(%arg0: !llvm.ptr):
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
+    }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 1>, reduction_syms = [@add_f32, @add_f32]} : (!llvm.ptr) -> ()
+    omp.terminator
+  }
   return
 }
 
@@ -2107,13 +2119,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-  "omp.taskloop"(%testf32, %testf32_2) ({
-  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
-    }
-  }) {in_reduction_syms = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 2, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> ()
+  omp.taskloop.context {
+    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+    "omp.taskloop"(%testf32, %testf32_2) ({
+    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
+    }) {in_reduction_syms = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 2, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> ()
+    omp.terminator
+  }
   return
 }
 
@@ -2121,13 +2136,16 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
-  "omp.taskloop"(%testf32) ({
-  ^bb0(%arg0: !llvm.ptr):
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
-    }
-  }) {in_reduction_syms = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0, 0>} : (!llvm.ptr) -> ()
+  omp.taskloop.context {
+    // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+    "omp.taskloop"(%testf32) ({
+    ^bb0(%arg0: !llvm.ptr):
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
+    }) {in_reduction_syms = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0, 0>} : (!llvm.ptr) -> ()
+    omp.terminator
+  }
   return
 }
 
@@ -2148,11 +2166,14 @@ combiner {
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
-  omp.taskloop nogroup reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
+    omp.taskloop nogroup reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2173,11 +2194,14 @@ combiner {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
-  // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
-  omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32 -> %arg1 : !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
+    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32 -> %arg1 : !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2186,11 +2210,14 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
-  omp.taskloop grainsize(%testi64: i64) num_tasks(%testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
+    omp.taskloop grainsize(%testi64: i64) num_tasks(%testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2199,11 +2226,14 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  // expected-error @below {{invalid grainsize modifier : 'strict1'}}
-  omp.taskloop grainsize(strict1, %testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{invalid grainsize modifier : 'strict1'}}
+    omp.taskloop grainsize(strict1, %testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2211,20 +2241,26 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
   %testi64 = "test.i64"() : () -> (i64)
-  // expected-error @below {{invalid num_tasks modifier : 'default'}}
-  omp.taskloop num_tasks(default, %testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{invalid num_tasks modifier : 'default'}}
+    omp.taskloop num_tasks(default, %testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
 // -----
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
-  // expected-error @below {{op nested in loop wrapper is not another loop wrapper or `omp.loop_nest`}}
-  omp.taskloop {
-    %0 = arith.constant 0 : i32
+  omp.taskloop.context {
+    // expected-error @below {{op nested in loop wrapper is not another loop wrapper or `omp.loop_nest`}}
+    omp.taskloop {
+      %0 = arith.constant 0 : i32
+    }
+    omp.terminator
   }
   return
 }
@@ -2232,14 +2268,17 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
 // -----
 
 func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
-  // expected-error @below {{only supported nested wrapper is 'omp.simd'}}
-  omp.taskloop {
-    omp.distribute {
-      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-        omp.yield
+  omp.taskloop.context {
+    // expected-error @below {{only supported nested wrapper is 'omp.simd'}}
+    omp.taskloop {
+      omp.distribute {
+        omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+          omp.yield
+        }
       }
-    }
-  } {omp.composite}
+    } {omp.composite}
+    omp.terminator
+  }
   return
 }
 
@@ -2539,11 +2578,11 @@ func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -
 func.func @omp_distribute_nested_wrapper2(%lb: index, %ub: index, %step: index) -> () {
   // expected-error @below {{only supported nested wrappers are 'omp.simd' and 'omp.wsloop'}}
   omp.distribute {
-    "omp.taskloop"() ({
+    omp.distribute {
       omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
         "omp.yield"() : () -> ()
       }
-    }) : () -> ()
+    }
   } {omp.composite}
 }
 
@@ -2930,26 +2969,69 @@ func.func @omp_distribute_invalid_composite(%lb: index, %ub: index, %step: index
 }
 
 // -----
-func.func @omp_taskloop_missing_composite(%lb: index, %ub: index, %step: index) -> () {
-  // expected-error @below {{'omp.composite' attribute missing from composite wrapper}}
+func.func @omp_taskloop_missing_loop() -> () {
+  // expected-error @below {{'omp.taskloop.context' op expected exactly 1 TaskloopOp directly nested in the region, but 0 were found}}
+  omp.taskloop.context {
+    omp.terminator
+  }
+  return
+}
+
+// -----
+func.func @omp_taskloop_missing_context(%lb: index, %ub: index, %step: index) -> () {
+  // expected-error @below {{'omp.taskloop' op expected to be nested in a taskloop context op}}
   omp.taskloop {
-    omp.simd {
+    omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
+      omp.yield
+    }
+  }
+  return
+}
+
+// -----
+func.func @omp_taskloop_shared_context(%lb: index, %ub: index, %step: index) -> () {
+  // expected-error @below {{'omp.taskloop.context' op expected exactly 1 TaskloopOp directly nested in the region, but 2 were found}}
+  omp.taskloop.context {
+    omp.taskloop {
       omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
         omp.yield
       }
-    } {omp.composite}
+    }
+    omp.taskloop {
+      omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
+        omp.yield
+      }
+    }
+    omp.terminator
   }
   return
 }
 
 // -----
-func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index) -> () {
-  // expected-error @below {{'omp.composite' attribute present in non-composite wrapper}}
-  omp.taskloop {
-    omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
-      omp.yield
+func.func @omp_taskloop_missing_composite(%lb: index, %ub: index, %step: index) -> () {
+  omp.taskloop.context {
+    // expected-error @below {{'omp.composite' attribute missing from composite wrapper}}
+    omp.taskloop {
+      omp.simd {
+        omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
+          omp.yield
+        }
+      } {omp.composite}
     }
-  } {omp.composite}
+  }
+  return
+}
+
+// -----
+func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index) -> () {
+  omp.taskloop.context {
+    // expected-error @below {{'omp.composite' attribute present in non-composite wrapper}}
+    omp.taskloop {
+      omp.loop_nest (%i) : index = (%lb) to (%ub) step (%step)  {
+        omp.yield
+      }
+    } {omp.composite}
+  }
   return
 }
 

diff  --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index b0554eba459f8..dc79ef2fb5484 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2389,13 +2389,16 @@ func.func @omp_cancel_taskgroup() -> () {
 }
 
 func.func @omp_taskloop_cancel_taskgroup(%lb : index, %ub : index, %step : index) {
-  omp.taskloop {
-    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
-      // CHECK: omp.cancel cancellation_construct_type(taskgroup)
-      omp.cancel cancellation_construct_type(taskgroup)
-      // CHECK: omp.yield
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        // CHECK: omp.cancel cancellation_construct_type(taskgroup)
+        omp.cancel cancellation_construct_type(taskgroup)
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
   return
 }
@@ -2631,160 +2634,232 @@ func.func @omp_taskgroup_clauses() -> () {
 // CHECK-LABEL: @omp_taskloop
 func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
 
-  // CHECK: omp.taskloop {
-  omp.taskloop {
-    omp.loop_nest (%i) : i32 = (%lb) to (%ub) step (%step)  {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
+      omp.loop_nest (%i) : i32 = (%lb) to (%ub) step (%step)  {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   %testbool = "test.bool"() : () -> (i1)
 
-  // CHECK: omp.taskloop if(%{{[^)]+}}) {
-  omp.taskloop if(%testbool) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop if(%{{[^)]+}}) {
+    omp.taskloop if(%testbool) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop final(%{{[^)]+}}) {
-  omp.taskloop final(%testbool) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop final(%{{[^)]+}}) {
+    omp.taskloop final(%testbool) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop untied {
-  omp.taskloop untied {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop untied {
+    omp.taskloop untied {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop mergeable {
-  omp.taskloop mergeable {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop mergeable {
+    omp.taskloop mergeable {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   %testf32 = "test.f32"() : () -> (!llvm.ptr)
   %testf32_2 = "test.f32"() : () -> (!llvm.ptr)
-  // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-  omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   // Checking byref attribute for in_reduction
-  // CHECK: omp.taskloop in_reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-  omp.taskloop in_reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop in_reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop in_reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-  omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   // check byref attrbute for reduction
-  // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, byref @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
-  omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, byref @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop reduction(byref @add_f32 %{{.+}} -> %{{.+}}, byref @add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr, !llvm.ptr) {
+    omp.taskloop reduction(byref @add_f32 %testf32 -> %arg0, byref @add_f32 %testf32_2 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) {
-  omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32_2 -> %arg1 : !llvm.ptr) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop in_reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) reduction(@add_f32 %{{.+}} -> %{{.+}} : !llvm.ptr) {
+    omp.taskloop in_reduction(@add_f32 %testf32 -> %arg0 : !llvm.ptr) reduction(@add_f32 %testf32_2 -> %arg1 : !llvm.ptr) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   %testi32 = "test.i32"() : () -> (i32)
-  // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) {
-  omp.taskloop priority(%testi32 : i32) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) {
+    omp.taskloop priority(%testi32 : i32) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   %testmemref = "test.memref"() : () -> (memref<i32>)
-  // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) {
-  omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) {
+    omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
   %testi64 = "test.i64"() : () -> (i64)
-  // CHECK: omp.taskloop grainsize(%{{[^:]+}}: i64) {
-  omp.taskloop grainsize(%testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
-    }
-  }
-
-  // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) {
-  omp.taskloop num_tasks(%testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop grainsize(%{{[^:]+}}: i64) {
+    omp.taskloop grainsize(%testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop grainsize(strict, %{{[^:]+}}: i64) {
-  omp.taskloop grainsize(strict, %testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) {
+    omp.taskloop num_tasks(%testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop num_tasks(strict, %{{[^:]+}}: i64) {
-  omp.taskloop num_tasks(strict, %testi64: i64) {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop grainsize(strict, %{{[^:]+}}: i64) {
+    omp.taskloop grainsize(strict, %testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop nogroup {
-  omp.taskloop nogroup {
-    omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
-      // CHECK: omp.yield
-      omp.yield
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop num_tasks(strict, %{{[^:]+}}: i64) {
+    omp.taskloop num_tasks(strict, %testi64: i64) {
+      omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+        // CHECK: omp.yield
+        omp.yield
+      }
     }
+    omp.terminator
   }
 
-  // CHECK: omp.taskloop {
-  omp.taskloop {
-    omp.simd {
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop nogroup {
+    omp.taskloop nogroup {
       omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
         // CHECK: omp.yield
         omp.yield
       }
+    }
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop.context {
+  omp.taskloop.context {
+    // CHECK: omp.taskloop {
+    omp.taskloop {
+      omp.simd {
+        omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+          // CHECK: omp.yield
+          omp.yield
+        }
+      } {omp.composite}
     } {omp.composite}
-  } {omp.composite}
+    omp.terminator
+  }
 
   // CHECK: return
   return

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-bounds-cast.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-bounds-cast.mlir
index 41121705aa2c1..9b1a90aa69369 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-bounds-cast.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-bounds-cast.mlir
@@ -14,11 +14,14 @@ llvm.func @_QPtest_taskloop_bounds() {
   %lb = llvm.mlir.constant(1 : i32) : i32
   %ub = llvm.mlir.constant(10 : i32) : i32
   %step = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop private(@_QPtest_taskloop_boundsEi_private_i32 %1 -> %arg0 : !llvm.ptr) {
-    omp.loop_nest (%arg1) : i32 = (%lb) to (%ub) inclusive step (%step) {
-      llvm.store %arg1, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QPtest_taskloop_boundsEi_private_i32 %1 -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%arg1) : i32 = (%lb) to (%ub) inclusive step (%step) {
+        llvm.store %arg1, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
index 983cf943ea245..13d78357e8e67 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancel.mlir
@@ -21,14 +21,17 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-      llvm.store %arg3, %arg2 : i32, !llvm.ptr
-      llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
-      omp.cancel cancellation_construct_type(taskgroup)
-      llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+        llvm.store %arg3, %arg2 : i32, !llvm.ptr
+        llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
+        omp.cancel cancellation_construct_type(taskgroup)
+        llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -90,9 +93,11 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_35:.*]]
 // CHECK:         %[[VAL_36:.*]] = getelementptr { i32 }, ptr %[[VAL_32]], i32 0, i32 0
 // CHECK:         br label %[[VAL_37:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_34]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_34]]
+// CHECK:         br label %[[VAL_37_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_37]]
 // CHECK:         br label %[[VAL_38:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37_1]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
 // CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
@@ -108,8 +113,10 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_48]]
 // CHECK:         br label %[[OMP_LOOP_AFTER:omp_loop.after]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_51]]
+// CHECK:         br label %[[CONT2:omp.region.cont2]]
+// CHECK:       omp.region.cont2:                                 ; preds = %[[OMP_LOOP_AFTER]]
 // CHECK:         br label %[[CONT:omp.region.cont]]
-// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.fini]], %[[OMP_LOOP_AFTER]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.fini]], %[[CONT2]]
 // CHECK:         call void @_dealloc(ptr %[[VAL_36]])
 // CHECK:         tail call void @free(ptr %[[VAL_32]])
 // CHECK:         br label %[[VAL_55:.*]]
@@ -131,7 +138,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       omp.loop_nest.region.split:                       ; preds = %[[LOOP_REGION]]
 // CHECK:         call void @_QPafter(ptr %[[VAL_36]])
 // CHECK:         br label %[[VAL_64:.*]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_62]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_62]]
 // CHECK:         br label %[[VAL_45]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_64]]
 // CHECK:         %[[VAL_47]] = add nuw i32 %[[VAL_46]], 1
@@ -146,14 +153,17 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
   omp.taskgroup {
-    omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
-      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-        llvm.store %arg3, %arg2 : i32, !llvm.ptr
-        llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
-        omp.cancel cancellation_construct_type(taskgroup)
-        llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
-        omp.yield
+    omp.taskloop.context {
+      omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+        omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+          llvm.store %arg3, %arg2 : i32, !llvm.ptr
+          llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
+          omp.cancel cancellation_construct_type(taskgroup)
+          llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
+          omp.yield
+        }
       }
+      omp.terminator
     }
     omp.terminator
   }
@@ -224,9 +234,11 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_105:.*]]
 // CHECK:         %[[VAL_106:.*]] = getelementptr { i32 }, ptr %[[VAL_102]], i32 0, i32 0
 // CHECK:         br label %[[VAL_107:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_104]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_104]]
+// CHECK:         br label %[[VAL_107_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_107]]
 // CHECK:         br label %[[VAL_108:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107_1]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
 // CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
@@ -242,8 +254,10 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_118]]
 // CHECK:         br label %[[VAL_122:.*]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_121]]
-// CHECK:         br label %[[VAL_123:omp.region.cont2]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_124:.fini]], %[[VAL_122]]
+// CHECK:         br label %[[CONT3:omp.region.cont3]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_122]]
+// CHECK:         br label %[[CONT2:omp.region.cont2]]
+// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_124:.fini]], %[[CONT3]]
 // CHECK:         call void @_dealloc(ptr %[[VAL_106]])
 // CHECK:         tail call void @free(ptr %[[VAL_102]])
 // CHECK:         br label %[[VAL_125:.*]]
@@ -261,16 +275,16 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       omp.loop_nest.region.cncl:                        ; preds = %[[VAL_128]]
 // CHECK:         br label %[[VAL_124]]
 // CHECK:       .fini:                                            ; preds = %[[VAL_133]]
-// CHECK:         br label %[[VAL_123]]
+// CHECK:         br label %[[CONT2]]
 // CHECK:       omp.loop_nest.region.split:                       ; preds = %[[VAL_128]]
 // CHECK:         call void @_QPafter(ptr %[[VAL_106]])
 // CHECK:         br label %[[VAL_134:.*]]
-// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_132]]
+// CHECK:       omp.region.cont4:                                 ; preds = %[[VAL_132]]
 // CHECK:         br label %[[VAL_115]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_134]]
 // CHECK:         %[[VAL_117]] = add nuw i32 %[[VAL_116]], 1
 // CHECK:         br label %[[VAL_114]]
-// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_123]]
+// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[CONT2]]
 // CHECK:         ret void
 
 // Test if clause
@@ -279,15 +293,18 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-      llvm.store %arg3, %arg2 : i32, !llvm.ptr
-      llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
-      %true = llvm.mlir.constant(1 : i1) : i1
-      omp.cancel cancellation_construct_type(taskgroup) if(%true)
-      llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+        llvm.store %arg3, %arg2 : i32, !llvm.ptr
+        llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
+        %true = llvm.mlir.constant(1 : i1) : i1
+        omp.cancel cancellation_construct_type(taskgroup) if(%true)
+        llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -349,9 +366,11 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_171:.*]]
 // CHECK:         %[[VAL_172:.*]] = getelementptr { i32 }, ptr %[[VAL_168]], i32 0, i32 0
 // CHECK:         br label %[[VAL_173:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_170]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_170]]
+// CHECK:         br label %[[VAL_173_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_173]]
 // CHECK:         br label %[[VAL_174:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_173]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_173_1]]
 // CHECK:         %[[VAL_175:.*]] = sub i64 %[[VAL_164]], %[[VAL_162]]
 // CHECK:         %[[VAL_176:.*]] = sdiv i64 %[[VAL_175]], %[[VAL_166]]
 // CHECK:         %[[VAL_177:.*]] = add i64 %[[VAL_176]], 1
@@ -367,8 +386,10 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_184]]
 // CHECK:         br label %[[LOOP_AFTER:omp_loop.after]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[LOOP_EXIT]]
+// CHECK:         br label %[[OMP_REGION_CONT2:omp.region.cont2]]
+// CHECK:       omp.region.cont2:                                  ; preds = %[[LOOP_AFTER]]
 // CHECK:         br label %[[OMP_REGION_CONT:omp.region.cont]]
-// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.*]], %[[LOOP_AFTER]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.*]], %[[OMP_REGION_CONT2]]
 // CHECK:         call void @_dealloc(ptr %[[VAL_172]])
 // CHECK:         tail call void @free(ptr %[[VAL_168]])
 // CHECK:         br label %[[VAL_191:.*]]
@@ -395,7 +416,7 @@ llvm.func @_QPtest3(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       [[AFTER]]:                                        ; preds = %[[CANCEL_IF_FALSE_CONT]], %[[CANCEL_IF_TRUE_CONT:.*]]
 // CHECK:         call void @_QPafter(ptr %[[VAL_172]])
 // CHECK:         br label %[[VAL_205:.*]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[AFTER]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[AFTER]]
 // CHECK:         br label %[[VAL_181]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_205]]
 // CHECK:         %[[VAL_183]] = add nuw i32 %[[VAL_182]], 1

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
index feeca4a272741..8338070b8bbc4 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-cancellation-point.mlir
@@ -21,14 +21,17 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
   %1 = llvm.mlir.constant(100 : i32) : i32
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-      llvm.store %arg3, %arg2 : i32, !llvm.ptr
-      llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
-      omp.cancellation_point cancellation_construct_type(taskgroup)
-      llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+        llvm.store %arg3, %arg2 : i32, !llvm.ptr
+        llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
+        omp.cancellation_point cancellation_construct_type(taskgroup)
+        llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -90,9 +93,11 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_35:.*]]
 // CHECK:         %[[VAL_36:.*]] = getelementptr { i32 }, ptr %[[VAL_32]], i32 0, i32 0
 // CHECK:         br label %[[VAL_37:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_34]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_34]]
+// CHECK:         br label %[[VAL_37_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_37]]
 // CHECK:         br label %[[VAL_38:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37_1]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_28]], %[[VAL_26]]
 // CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_30]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
@@ -108,8 +113,10 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_48]]
 // CHECK:         br label %[[OMP_LOOP_AFTER:omp_loop.after]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_51]]
+// CHECK:         br label %[[CONT2:omp.region.cont2]]
+// CHECK:       omp.region.cont2:                                 ; preds = %[[OMP_LOOP_AFTER]]
 // CHECK:         br label %[[CONT:omp.region.cont]]
-// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.fini]], %[[OMP_LOOP_AFTER]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[FINI:.fini]], %[[CONT2]]
 // CHECK:         call void @_dealloc(ptr %[[VAL_36]])
 // CHECK:         tail call void @free(ptr %[[VAL_32]])
 // CHECK:         br label %[[VAL_55:.*]]
@@ -131,7 +138,7 @@ llvm.func @_QPtest(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm.
 // CHECK:       omp.loop_nest.region.split:                       ; preds = %[[LOOP_REGION]]
 // CHECK:         call void @_QPafter(ptr %[[VAL_36]])
 // CHECK:         br label %[[VAL_64:.*]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_62]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_62]]
 // CHECK:         br label %[[VAL_45]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_64]]
 // CHECK:         %[[VAL_47]] = add nuw i32 %[[VAL_46]], 1
@@ -146,14 +153,17 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
   %2 = llvm.mlir.constant(1 : i64) : i64
   %3 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
   omp.taskgroup {
-    omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
-      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-        llvm.store %arg3, %arg2 : i32, !llvm.ptr
-        llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
-        omp.cancellation_point cancellation_construct_type(taskgroup)
-        llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
-        omp.yield
+    omp.taskloop.context {
+      omp.taskloop nogroup private(@_QFtestEarg_firstprivate_i32 %arg0 -> %arg1, @_QFtestEi_private_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
+        omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+          llvm.store %arg3, %arg2 : i32, !llvm.ptr
+          llvm.call @_QPbefore(%arg1) : (!llvm.ptr) -> ()
+          omp.cancellation_point cancellation_construct_type(taskgroup)
+          llvm.call @_QPafter(%arg1) : (!llvm.ptr) -> ()
+          omp.yield
+        }
       }
+      omp.terminator
     }
     omp.terminator
   }
@@ -224,9 +234,11 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_105:.*]]
 // CHECK:         %[[VAL_106:.*]] = getelementptr { i32 }, ptr %[[VAL_102]], i32 0, i32 0
 // CHECK:         br label %[[VAL_107:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_104]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_104]]
+// CHECK:         br label %[[VAL_107_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_107]]
 // CHECK:         br label %[[VAL_108:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_107_1]]
 // CHECK:         %[[VAL_109:.*]] = sub i64 %[[VAL_98]], %[[VAL_96]]
 // CHECK:         %[[VAL_110:.*]] = sdiv i64 %[[VAL_109]], %[[VAL_100]]
 // CHECK:         %[[VAL_111:.*]] = add i64 %[[VAL_110]], 1
@@ -242,8 +254,10 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_118]]
 // CHECK:         br label %[[VAL_122:.*]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_121]]
+// CHECK:         br label %[[VAL_123_1:omp.region.cont3]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_122]]
 // CHECK:         br label %[[VAL_123:omp.region.cont2]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_124:.fini]], %[[VAL_122]]
+// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_124:.fini]], %[[VAL_123_1]]
 // CHECK:         call void @_dealloc(ptr %[[VAL_106]])
 // CHECK:         tail call void @free(ptr %[[VAL_102]])
 // CHECK:         br label %[[VAL_125:.*]]
@@ -265,7 +279,7 @@ llvm.func @_QPtest2(%arg0: !llvm.ptr {fir.bindc_name = "arg", llvm.noalias, llvm
 // CHECK:       omp.loop_nest.region.split:                       ; preds = %[[VAL_128]]
 // CHECK:         call void @_QPafter(ptr %[[VAL_106]])
 // CHECK:         br label %[[VAL_134:.*]]
-// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_132]]
+// CHECK:       omp.region.cont4:                                 ; preds = %[[VAL_132]]
 // CHECK:         br label %[[VAL_115]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_134]]
 // CHECK:         %[[VAL_117]] = add nuw i32 %[[VAL_116]], 1

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
index f63b6691d165a..51f1c295a83cf 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-collapse.mlir
@@ -20,15 +20,18 @@ llvm.func @_QPtest() {
   %c1_i32 = llvm.mlir.constant(1 :i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3) : i32 = (%c1_i32, %c1_i32) to (%c10_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3) : i32 = (%c1_i32, %c1_i32) to (%c10_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -72,15 +75,18 @@ llvm.func @_QPtest2() {
   %c2_i32 = llvm.mlir.constant(2 : i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3, %arg4) : i32 = (%c1_i32, %c1_i32, %c2_i32) to (%c10_i32, %c5_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32, %c1_i32) collapse(3) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3, %arg4) : i32 = (%c1_i32, %c1_i32, %c2_i32) to (%c10_i32, %c5_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32, %c1_i32) collapse(3) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -129,15 +135,18 @@ llvm.func @_QPtest3() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c20_i32 = llvm.mlir.constant(20 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c20_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c20_i32, %c5_i32) inclusive step (%c1_i32, %c1_i32) collapse(2) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -186,15 +195,18 @@ llvm.func @_QPtest4() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c15_i32 = llvm.mlir.constant(15 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3) : i32 = (%c2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3) : i32 = (%c2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -245,15 +257,18 @@ llvm.func @_QPtest5() {
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
   %c15_i32 = llvm.mlir.constant(15 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3) : i32 = (%cneg2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3) : i32 = (%cneg2_i32, %c5_i32) to (%c10_i32, %c15_i32) inclusive step (%c2_i32, %c3_i32) collapse(2) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -300,15 +315,18 @@ llvm.func @_QPtest6() {
   %c1_i32 = llvm.mlir.constant(1 :i32) : i32
   %c5_i32 = llvm.mlir.constant(5 : i32) : i32
   %c10_i32 = llvm.mlir.constant(10 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c5_i32, %c5_i32) inclusive step (%cneg1_i32, %c1_i32) collapse(2) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2, %arg3) : i32 = (%c10_i32, %c1_i32) to (%c5_i32, %c5_i32) inclusive step (%cneg1_i32, %c1_i32) collapse(2) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir
new file mode 100644
index 0000000000000..1685d0065a102
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-context-alloca.mlir
@@ -0,0 +1,49 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Check converting code where there are operations other than omp.taskloop
+// inside of omp.taskloop.context.
+
+// The following is a common pattern generated by flang when accessing
+// mutable boxes (e.g. ALLOCATABLE) inside of a taskloop
+
+omp.private {type = private} @_QFtest_taskloopEi_private_i32 : i32
+
+llvm.func @_QPtest_taskloop(%arg0: !llvm.ptr) {
+  %0 = llvm.mlir.constant(48 : i32) : i32
+  %1 = llvm.mlir.constant(1 : i32) : i32
+  %2 = llvm.mlir.constant(100 : i32) : i32
+  %3 = llvm.mlir.constant(1 : i64) : i64
+  %4 = llvm.alloca %3 x i32 : (i64) -> !llvm.ptr
+  omp.taskloop.context {
+    // test where this alloca ends up
+    %5 = llvm.alloca %1 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+    omp.taskloop private(@_QFtest_taskloopEi_private_i32 %4 -> %arg1 : !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%1) to (%2) inclusive step (%1) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        "llvm.intr.memcpy"(%5, %arg0, %0) <{arg_attrs = [{llvm.align = 8 : i64}, {llvm.align = 8 : i64}, {}], isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+        %6 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+        %7 = llvm.load %6 : !llvm.ptr -> !llvm.ptr
+        llvm.call @_QPdo_something(%7) {fastmathFlags = #llvm.fastmath<contract>} : (!llvm.ptr) -> ()
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+llvm.func @_QPdo_something(!llvm.ptr) attributes {sym_visibility = "private"}
+
+// Outlined task function:
+// CHECK-LABEL: define internal void @_QPtest_taskloop..omp_par(
+// CHECK:       taskloop.alloca:
+//                ...
+// CHECK:         br label %[[VAL_31:taskloop.body]]
+
+// CHECK:       taskloop.body:                                    ; preds = %[[VAL_32:.*]]
+// CHECK-NEXT:    br label %[[VAL_33:omp.taskloop.context.region]]
+
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_31]]
+// This will be folded into the entry block by later optimization passes:
+// CHECK-NEXT:    %[[VAL_34:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
+// CHECK-NEXT:    br label %[[VAL_35:omp.taskloop.region]]

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
index 7034192612ba4..d06e110ce0631 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-final.mlir
@@ -20,15 +20,18 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %true = llvm.mlir.constant(true) : i1
-  omp.taskloop final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop final(%true) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
index 7b7451331ce30..ca0f93a95d33d 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-grainsize.mlir
@@ -20,15 +20,18 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c2_i32 = llvm.mlir.constant(2 : i32) : i32
-  omp.taskloop grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop grainsize(%c2_i32: i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
index f47b588e60a7d..08644531dff0a 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-if.mlir
@@ -22,15 +22,18 @@ llvm.func @_QPtest() {
   %a_val = llvm.load %3 : !llvm.ptr -> i32
   %c20 = llvm.mlir.constant(20 : i32) : i32
   %cmp = llvm.icmp "slt" %a_val, %c20 : i32
-  omp.taskloop if(%cmp) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop if(%cmp) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
index ba46fc9794e74..0b94d6d7704d5 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-mergeable.mlir
@@ -19,15 +19,18 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop mergeable private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop mergeable private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 4976b442c9852..45fed7ce5f432 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -13,11 +13,14 @@ llvm.func @_QPtest() {
   %3 = llvm.alloca %2 x i32 {bindc_name = "t2"} : (i64) -> !llvm.ptr
   %4 = llvm.alloca %2 x i32 {bindc_name = "t1"} : (i64) -> !llvm.ptr
   %5 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
-  omp.taskloop private(@_QFtestEt1_private_i32 %4 -> %arg0, @_QFtestEt2_private_i32 %3 -> %arg1, @_QFtestEi_private_i32 %5 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
-      llvm.store %arg3, %arg2 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEt1_private_i32 %4 -> %arg0, @_QFtestEt2_private_i32 %3 -> %arg1, @_QFtestEi_private_i32 %5 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg3) : i32 = (%0) to (%1) inclusive step (%0) {
+        llvm.store %arg3, %arg2 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -74,9 +77,11 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_35:.*]]
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_36:.*]]
 // CHECK:         br label %[[VAL_37:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_35]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_35]]
+// CHECK:         br label %[[VAL_38_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_37]]
 // CHECK:         br label %[[VAL_38:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_37]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_38_1]]
 // CHECK:         %[[VAL_39:.*]] = sub i64 %[[VAL_27]], %[[VAL_25]]
 // CHECK:         %[[VAL_40:.*]] = sdiv i64 %[[VAL_39]], %[[VAL_29]]
 // CHECK:         %[[VAL_41:.*]] = add i64 %[[VAL_40]], 1
@@ -93,7 +98,9 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_52:.*]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_51]]
 // CHECK:         br label %[[VAL_53:.*]]
-// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_52]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_52]]
+// CHECK:         br label %[[VAL_53_1:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_53_1]]
 // CHECK:         br label %[[VAL_54:.*]]
 // CHECK:       omp_loop.body:                                    ; preds = %[[VAL_48]]
 // CHECK:         %[[VAL_55:.*]] = mul i32 %[[VAL_46]], 1
@@ -102,12 +109,12 @@ llvm.func @_QPtest() {
 // CHECK:       omp.loop_nest.region:                             ; preds = %[[VAL_50]]
 // CHECK:         store i32 %[[VAL_56]], ptr %[[VAL_34]], align 4
 // CHECK:         br label %[[VAL_58:.*]]
-// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_57]]
+// CHECK:       omp.region.cont4:                                 ; preds = %[[VAL_57]]
 // CHECK:         br label %[[VAL_45]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_58]]
 // CHECK:         %[[VAL_47]] = add nuw i32 %[[VAL_46]], 1
 // CHECK:         br label %[[VAL_44]]
-// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_53]]
+// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_53_1]]
 // CHECK:         ret void
 
 // CHECK-NOT: define internal void @omp_taskloop_dup

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
index b15b5721a67f8..18b9612360238 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-nogroup.mlir
@@ -19,15 +19,18 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop nogroup private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop nogroup private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
index d4481d1199a75..63bb8fcce172d 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-num_tasks.mlir
@@ -20,15 +20,18 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c2_i32 = llvm.mlir.constant(2: i32) : i32
-  omp.taskloop num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop num_tasks(%c2_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
index 16dc904788c97..90dde350a84ff 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-outer-bounds.mlir
@@ -22,11 +22,14 @@ llvm.func @_QPtest() {
   %lb = llvm.load %lb.addr : !llvm.ptr -> i32
   %ub = llvm.load %ub.addr : !llvm.ptr -> i32
   %step = llvm.load %step.addr : !llvm.ptr -> i32
-  omp.taskloop private(@_QFtestEi_private_i32 %i -> %arg0 : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
-      llvm.store %iv, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEi_private_i32 %i -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+        llvm.store %iv, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
index 15618c9c9317a..fb92a66086e8f 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-priority.mlir
@@ -20,15 +20,18 @@ llvm.func @_QPtest() {
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
   %c1_i32 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop priority(%c1_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop priority(%c1_i32 : i32) private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
index c9c06bdf5cd43..2b44d3aceaa65 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-untied.mlir
@@ -19,15 +19,18 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop untied private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop untied private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -46,15 +49,18 @@ llvm.func @_QPtest_tied() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }

diff  --git a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
index d8f644de657fc..513f4c9609cf2 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop.mlir
@@ -19,15 +19,18 @@ llvm.func @_QPtest() {
   %7 = llvm.mlir.constant(1 : i32) : i32
   %8 = llvm.mlir.constant(5 : i32) : i32
   %9 = llvm.mlir.constant(1 : i32) : i32
-  omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
-    omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
-      llvm.store %arg2, %arg1 : i32, !llvm.ptr
-      %10 = llvm.load %arg0 : !llvm.ptr -> i32
-      %11 = llvm.mlir.constant(1 : i32) : i32
-      %12 = llvm.add %10, %11 : i32
-      llvm.store %12, %arg0 : i32, !llvm.ptr
-      omp.yield
+  omp.taskloop.context {
+    omp.taskloop private(@_QFtestEa_firstprivate_i32 %3 -> %arg0, @_QFtestEi_private_i32 %1 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%7) to (%8) inclusive step (%9) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %10 = llvm.load %arg0 : !llvm.ptr -> i32
+        %11 = llvm.mlir.constant(1 : i32) : i32
+        %12 = llvm.add %10, %11 : i32
+        llvm.store %12, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -92,9 +95,11 @@ llvm.func @_QPtest() {
 // CHECK:       taskloop.body:                                    ; preds = %[[VAL_36:.*]]
 // CHECK:         %[[VAL_37:.*]] = getelementptr { i32 }, ptr %[[VAL_33]], i32 0, i32 0
 // CHECK:         br label %[[VAL_38:.*]]
-// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_35]]
+// CHECK:       omp.taskloop.context.region:                      ; preds = %[[VAL_35]]
+// CHECK:         br label %[[VAL_38_1:.*]]
+// CHECK:       omp.taskloop.region:                              ; preds = %[[VAL_38]]
 // CHECK:         br label %[[VAL_39:.*]]
-// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_38]]
+// CHECK:       omp_loop.preheader:                               ; preds = %[[VAL_38_1]]
 // CHECK:         %[[VAL_40:.*]] = sub i64 %[[VAL_29]], %[[VAL_27]]
 // CHECK:         %[[VAL_41:.*]] = sdiv i64 %[[VAL_40]], %[[VAL_31]]
 // CHECK:         %[[VAL_42:.*]] = add i64 %[[VAL_41]], 1
@@ -111,7 +116,9 @@ llvm.func @_QPtest() {
 // CHECK:         br label %[[VAL_53:.*]]
 // CHECK:       omp_loop.after:                                   ; preds = %[[VAL_52]]
 // CHECK:         br label %[[VAL_54:.*]]
-// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_53]]
+// CHECK:       omp.region.cont2:                                  ; preds = %[[VAL_53]]
+// CHECK:         br label %[[VAL_54_1:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = %[[VAL_54]]
 // CHECK:         tail call void @free(ptr %[[VAL_33]])
 // CHECK:         br label %[[VAL_55:.*]]
 // CHECK:       omp_loop.body:                                    ; preds = %[[VAL_49]]
@@ -124,12 +131,12 @@ llvm.func @_QPtest() {
 // CHECK:         %[[VAL_60:.*]] = add i32 %[[VAL_59]], 1
 // CHECK:         store i32 %[[VAL_60]], ptr %[[VAL_37]], align 4
 // CHECK:         br label %[[VAL_61:.*]]
-// CHECK:       omp.region.cont2:                                 ; preds = %[[VAL_58]]
+// CHECK:       omp.region.cont3:                                 ; preds = %[[VAL_58]]
 // CHECK:         br label %[[VAL_46]]
 // CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_61]]
 // CHECK:         %[[VAL_48]] = add nuw i32 %[[VAL_47]], 1
 // CHECK:         br label %[[VAL_45]]
-// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_54]]
+// CHECK:       taskloop.exit.exitStub:                           ; preds = %[[VAL_54_1]]
 // CHECK:         ret void
 
 // CHECK-LABEL: define internal void @omp_taskloop_dup(

diff  --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index ea7ec3cfc3bdb..c5925c0d5b970 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -331,12 +331,16 @@ llvm.func @taskgroup_task_reduction(%x : !llvm.ptr) {
 // -----
 
 llvm.func @taskloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
+  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
+  omp.taskloop.context {
+    // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.taskloop operation}}
+    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
+    omp.taskloop allocate(%x : !llvm.ptr -> %x : !llvm.ptr) {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -353,12 +357,16 @@ llvm.func @taskloop_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr)
   }
 
 llvm.func @taskloop_inreduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause in_reduction in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop in_reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
+  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
+  omp.taskloop.context {
+    // expected-error at below {{not yet implemented: Unhandled clause in_reduction in omp.taskloop operation}}
+    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
+    omp.taskloop in_reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }
@@ -375,12 +383,16 @@ llvm.func @taskloop_inreduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.pt
   }
 
 llvm.func @taskloop_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
-  // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.taskloop operation}}
-  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
-  omp.taskloop reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
-    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
-      omp.yield
+  // expected-error at below {{LLVM Translation failed for operation: omp.taskloop.context}}
+  omp.taskloop.context {
+    // expected-error at below {{not yet implemented: Unhandled clause reduction in omp.taskloop operation}}
+    // expected-error at below {{LLVM Translation failed for operation: omp.taskloop}}
+    omp.taskloop reduction(@add_reduction_i32 %x -> %arg0 : !llvm.ptr) {
+      omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
+        omp.yield
+      }
     }
+    omp.terminator
   }
   llvm.return
 }


        


More information about the Mlir-commits mailing list