[Mlir-commits] [mlir] 1063dfc - [mlir][openmp] Added omp.taskloop

Shraiysh Vaishay llvmlistbot at llvm.org
Mon Jul 4 00:20:19 PDT 2022


Author: Shraiysh Vaishay
Date: 2022-07-04T12:50:07+05:30
New Revision: 1063dfc0285362918aa6bce482a532cb911f64fb

URL: https://github.com/llvm/llvm-project/commit/1063dfc0285362918aa6bce482a532cb911f64fb
DIFF: https://github.com/llvm/llvm-project/commit/1063dfc0285362918aa6bce482a532cb911f64fb.diff

LOG: [mlir][openmp] Added omp.taskloop

This patch adds omp.taskloop operation to OpenMP Dialect along with
tests.

Reviewed By: peixin

Differential Revision: https://reviews.llvm.org/D127380

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
    mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
    mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
    mlir/test/Dialect/OpenMP/invalid.mlir
    mlir/test/Dialect/OpenMP/ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index eae2882b6ea9..761e964644b0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -132,7 +132,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
     // TODO: remove this once emitAccessorPrefix is set to
     // kEmitAccessorPrefix_Prefixed for the dialect.
     /// Returns the reduction variables
-    operand_range getReductionVars() { return reduction_vars(); }
+    SmallVector<Value> getReductionVars() {
+      return SmallVector<Value>(reduction_vars().begin(),
+                                reduction_vars().end());
+    }
   }];
 }
 
@@ -237,7 +240,10 @@ def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments,
     // TODO: remove this once emitAccessorPrefix is set to
     // kEmitAccessorPrefix_Prefixed for the dialect.
     /// Returns the reduction variables
-    operand_range getReductionVars() { return reduction_vars(); }
+    SmallVector<Value> getReductionVars() {
+      return SmallVector<Value>(reduction_vars().begin(),
+                                reduction_vars().end());
+    }
   }];
 }
 
@@ -375,7 +381,10 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
     // TODO: remove this once emitAccessorPrefix is set to
     // kEmitAccessorPrefix_Prefixed for the dialect.
     /// Returns the reduction variables
-    operand_range getReductionVars() { return reduction_vars(); }
+    SmallVector<Value> getReductionVars() {
+      return SmallVector<Value>(reduction_vars().begin(),
+                                reduction_vars().end());
+    }
   }];
   let hasCustomAssemblyFormat = 1;
   let assemblyFormat = [{
@@ -393,7 +402,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
               custom<ReductionVarList>(
                 $reduction_vars, type($reduction_vars), $reductions
               ) `)`
-    ) `for` custom<WsLoopControl>($region, $lowerBound, $upperBound, $step,
+    ) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
                                   type($step), $inclusive) attr-dict
   }];
   let hasVerifier = 1;
@@ -542,11 +551,172 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments,
   }];
   let extraClassDeclaration = [{
     /// Returns the reduction variables
-    operand_range getReductionVars() { return in_reduction_vars(); }
+    SmallVector<Value> getReductionVars() {
+      return SmallVector<Value>(in_reduction_vars().begin(),
+                                in_reduction_vars().end());
+    }
   }];
   let hasVerifier = 1;
 }
 
+def TaskLoopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
+                           AutomaticAllocationScope, RecursiveSideEffects,
+                           AllTypesMatch<["lowerBound", "upperBound", "step"]>,
+                           ReductionClauseInterface]> {
+  let summary = "taskloop construct";
+  let description = [{
+    The taskloop construct specifies that the iterations of one or more
+    associated loops will be executed in parallel using explicit tasks. The
+    iterations are distributed across tasks generated by the construct and
+    scheduled to be executed.
+
+    The `lowerBound` and `upperBound` specify a half-open range: the range
+    includes the lower bound but does not include the upper bound. If the
+    `inclusive` attribute is specified then the upper bound is also included.
+    The `step` specifies the loop step.
+
+    The body region can contain any number of blocks.
+
+    ```
+    omp.taskloop <clauses>
+    for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+      %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+      %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+      %sum = arith.addf %a, %b : f32
+      store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+      omp.terminator
+    }
+    ```
+
+    For definitions of "undeferred task", "included task", "final task" and
+    "mergeable task", please check OpenMP Specification.
+
+    When an `if` clause is present on a taskloop construct, and if the `if`
+    clause expression evaluates to `false`, undeferred tasks are generated. The
+    use of a variable in an `if` clause expression of a taskloop construct
+    causes an implicit reference to the variable in all enclosing constructs.
+
+    When a `final` clause is present on a taskloop construct and the `final`
+    clause expression evaluates to `true`, the generated tasks will be final
+    tasks. The use of a variable in a `final` clause expression of a taskloop
+    construct causes an implicit reference to the variable in all enclosing
+    constructs.
+
+    If the `untied` clause is specified, all tasks generated by the taskloop
+    construct are untied tasks.
+
+    When the `mergeable` clause is present on a taskloop construct, each
+    generated task is a mergeable task.
+
+    Reductions can be performed in a loop by specifying reduction accumulator
+    variables in `reduction_vars` or `in_reduction_vars` and symbols referring
+    to reduction declarations in the `reductions` or `in_reductions` attribute.
+    Each reduction is identified by the accumulator it uses and accumulators
+    must not be repeated in the same reduction. The `omp.reduction` operation
+    accepts the accumulator and a partial value which is considered to be
+    produced by the current loop iteration for the given reduction. If multiple
+    values are produced for the same accumulator, i.e. there are multiple
+    `omp.reduction`s, the last value is taken. The reduction declaration
+    specifies how to combine the values from each iteration into the final
+    value, which is available in the accumulator after the loop completes.
+
+    If an `in_reduction` clause is present on the taskloop construct, the
+    behavior is as if each generated task was defined by a task construct on
+    which an `in_reduction` clause with the same reduction operator and list
+    items is present. Thus, the generated tasks are participants of a reduction
+    previously defined by a reduction scoping clause.
+
+    If a `reduction` clause is present on the taskloop construct, the behavior
+    is as if a `task_reduction` clause with the same reduction operator and list
+    items was applied to the implicit taskgroup construct enclosing the taskloop
+    construct. The taskloop construct executes as if each generated task was
+    defined by a task construct on which an `in_reduction` clause with the same
+    reduction operator and list items is present. Thus, the generated tasks are
+    participants of the reduction defined by the `task_reduction` clause that
+    was applied to the implicit taskgroup construct.
+
+    When a `priority` clause is present on a taskloop construct, the generated
+    tasks use the `priority-value` as if it was specified for each individual
+    task. If the `priority` clause is not specified, tasks generated by the
+    taskloop construct have the default task priority (zero).
+
+    The `allocators_vars` and `allocate_vars` arguments are a variadic list of
+    values that specify the memory allocator to be used to obtain storage for
+    private values.
+
+    If a `grainsize` clause is present on the taskloop construct, the number of
+    logical loop iterations assigned to each generated task is greater than or
+    equal to the minimum of the value of the grain-size expression and the
+    number of logical loop iterations, but less than two times the value of the
+    grain-size expression.
+
+    If `num_tasks` is specified, the taskloop construct creates as many tasks as
+    the minimum of the num-tasks expression and the number of logical loop
+    iterations. Each task must have at least one logical loop iteration.
+
+    By default, the taskloop construct executes as if it was enclosed in a
+    taskgroup construct with no statements or directives outside of the taskloop
+    construct. Thus, the taskloop construct creates an implicit taskgroup
+    region. If the `nogroup` clause is present, no implicit taskgroup region is
+    created.
+  }];
+
+  let arguments = (ins Variadic<IntLikeType>:$lowerBound,
+                       Variadic<IntLikeType>:$upperBound,
+                       Variadic<IntLikeType>:$step,
+                       UnitAttr:$inclusive,
+                       Optional<I1>:$if_expr,
+                       Optional<I1>:$final_expr,
+                       UnitAttr:$untied,
+                       UnitAttr:$mergeable,
+                       Variadic<OpenMP_PointerLikeType>:$in_reduction_vars,
+                       OptionalAttr<SymbolRefArrayAttr>:$in_reductions,
+                       Variadic<OpenMP_PointerLikeType>:$reduction_vars,
+                       OptionalAttr<SymbolRefArrayAttr>:$reductions,
+                       Optional<IntLikeType>:$priority,
+                       Variadic<AnyType>:$allocate_vars,
+                       Variadic<AnyType>:$allocators_vars,
+                       Optional<IntLikeType>: $grain_size,
+                       Optional<IntLikeType>: $num_tasks,
+                       UnitAttr: $nogroup);
+
+  let regions = (region AnyRegion:$region);
+
+  let assemblyFormat = [{
+    oilist(`if` `(` $if_expr `)`
+          |`final` `(` $final_expr `)`
+          |`untied` $untied
+          |`mergeable` $mergeable
+          |`in_reduction` `(`
+              custom<ReductionVarList>(
+                $in_reduction_vars, type($in_reduction_vars), $in_reductions
+              ) `)`
+          |`reduction` `(`
+              custom<ReductionVarList>(
+                $reduction_vars, type($reduction_vars), $reductions
+              ) `)`
+          |`priority` `(` $priority `:` type($priority) `)`
+          |`allocate` `(`
+              custom<AllocateAndAllocator>(
+                $allocate_vars, type($allocate_vars),
+                $allocators_vars, type($allocators_vars)
+              ) `)`
+          |`grain_size` `(` $grain_size `:` type($grain_size) `)`
+          |`num_tasks` `(` $num_tasks `:` type($num_tasks) `)`
+          |`nogroup` $nogroup
+    ) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
+                                  type($step), $inclusive) attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    /// Returns the reduction variables
+    SmallVector<Value> getReductionVars();
+    void getEffects(SmallVectorImpl<MemoryEffects::EffectInstance> &effects);
+  }];
+
+  let hasVerifier = 1;
+}
+
 def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
                             ReductionClauseInterface,
                             AutomaticAllocationScope]> {

diff  --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 83180e03455e..d1dc9bd86bea 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -40,7 +40,7 @@ def ReductionClauseInterface : OpInterface<"ReductionClauseInterface"> {
 
   let methods = [
     InterfaceMethod<
-      "Get reduction vars", "::mlir::Operation::operand_range",
+      "Get reduction vars", "::mlir::SmallVector<::mlir::Value>",
       "getReductionVars">,
   ];
 }

diff  --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 2f4989082b8b..d09ef969c2f6 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -523,11 +523,11 @@ LogicalResult SingleOp::verify() {
 /// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
 /// steps := `step` `(`ssa-id-list`)`
 ParseResult
-parseWsLoopControl(OpAsmParser &parser, Region &region,
-                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
-                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
-                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
-                   SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
+parseLoopControl(OpAsmParser &parser, Region &region,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
+                 SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
+                 SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
   // Parse an opening `(` followed by induction variables followed by `)`
   SmallVector<OpAsmParser::Argument> ivs;
   Type loopVarType;
@@ -557,10 +557,10 @@ parseWsLoopControl(OpAsmParser &parser, Region &region,
   return parser.parseRegion(region, ivs);
 }
 
-void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
-                        ValueRange lowerBound, ValueRange upperBound,
-                        ValueRange steps, TypeRange loopVarTypes,
-                        UnitAttr inclusive) {
+void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
+                      ValueRange lowerBound, ValueRange upperBound,
+                      ValueRange steps, TypeRange loopVarTypes,
+                      UnitAttr inclusive) {
   auto args = region.front().getArguments();
   p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
     << ") to (" << upperBound << ") ";
@@ -736,6 +736,43 @@ LogicalResult TaskGroupOp::verify() {
                                 task_reduction_vars());
 }
 
+//===----------------------------------------------------------------------===//
+// TaskLoopOp
+//===----------------------------------------------------------------------===//
+SmallVector<Value> TaskLoopOp::getReductionVars() {
+  SmallVector<Value> all_reduction_nvars(in_reduction_vars().begin(),
+                                         in_reduction_vars().end());
+  all_reduction_nvars.insert(all_reduction_nvars.end(),
+                             reduction_vars().begin(), reduction_vars().end());
+  return all_reduction_nvars;
+}
+
+LogicalResult TaskLoopOp::verify() {
+  if (allocate_vars().size() != allocators_vars().size())
+    return emitError(
+        "expected equal sizes for allocate and allocator variables");
+  if (failed(verifyReductionVarList(*this, reductions(), reduction_vars())) ||
+      failed(
+          verifyReductionVarList(*this, in_reductions(), in_reduction_vars())))
+    return failure();
+
+  if (reduction_vars().size() > 0 && nogroup())
+    return emitError("if a reduction clause is present on the taskloop "
+                     "directive, the nogroup clause must not be specified");
+  for (auto var : reduction_vars()) {
+    if (llvm::is_contained(in_reduction_vars(), var))
+      return emitError("the same list item cannot appear in both a reduction "
+                       "and an in_reduction clause");
+  }
+
+  if (grain_size() && num_tasks()) {
+    return emitError(
+        "the grainsize clause and num_tasks clause are mutually exclusive and "
+        "may not appear on the same taskloop directive");
+  }
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // WsLoopOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index c08b80e5b79e..27ba2c88a789 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1294,3 +1294,128 @@ func.func @omp_cancellationpoint2() {
   }
   return
 }
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testmemref = "test.memref"() : () -> (memref<i32>)
+  // expected-error @below {{expected equal sizes for allocate and allocator variables}}
+  "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({
+  ^bb0(%arg3: i32, %arg4: i32):
+    "omp.terminator"() : () -> ()
+  }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> ()
+  return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
+  ^bb0(%arg3: i32, %arg4: i32):
+    "omp.terminator"() : () -> ()
+  }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
+  return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({
+  ^bb0(%arg3: i32, %arg4: i32):
+    "omp.terminator"() : () -> ()
+  }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
+  return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
+  ^bb0(%arg3: i32, %arg4: i32):
+    "omp.terminator"() : () -> ()
+  }) {in_reductions = [@add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
+  return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+  "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({
+  ^bb0(%arg3: i32, %arg4: i32):
+    "omp.terminator"() : () -> ()
+  }) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
+  return
+}
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = arith.constant 0.0 : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = arith.addf %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
+  omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>) nogroup
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = arith.constant 0.0 : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = arith.addf %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
+  omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+  %testi64 = "test.i64"() : () -> (i64)
+  // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
+  omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    omp.terminator
+  }
+  return
+}

diff  --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 5c8c0d83c24b..b4dccd72aa02 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
 
 func.func @omp_barrier() -> () {
   // CHECK: omp.barrier
@@ -1394,8 +1394,6 @@ func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memr
   return
 }
 
-// -----
-
 func.func @omp_threadprivate() {
   %0 = arith.constant 1 : i32
   %1 = arith.constant 2 : i32
@@ -1528,3 +1526,141 @@ func.func @omp_taskgroup_multiple_tasks() -> () {
   }
   return
 }
+
+// CHECK-LABEL: @omp_taskloop
+func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
+
+  // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+  omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step)  {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+  omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step)  {
+    // CHECK: test.op1
+    "test.op1"(%lb) : (i32) -> ()
+    // CHECK: test.op2
+    "test.op2"() : () -> ()
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) {
+  omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  %testbool = "test.bool"() : () -> (i1)
+
+  // CHECK: omp.taskloop if(%{{[^)]+}})
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop if(%testbool)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop final(%{{[^)]+}})
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop final(%testbool)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop untied
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop untied
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop mergeable
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop mergeable
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+  // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>) reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) reduction(@add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  %testi32 = "test.i32"() : () -> (i32)
+  // CHECK: omp.taskloop priority(%{{[^:]+}}: i32)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop priority(%testi32: i32)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  %testmemref = "test.memref"() : () -> (memref<i32>)
+  // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
+  omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  %testi64 = "test.i64"() : () -> (i64)
+  // CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop grain_size(%testi64: i64)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64)
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop num_tasks(%testi64: i64)
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.taskloop nogroup
+  // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+  omp.taskloop nogroup
+  for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: return
+  return
+}


        


More information about the Mlir-commits mailing list