[Mlir-commits] [mlir] 1063dfc - [mlir][openmp] Added omp.taskloop
Shraiysh Vaishay
llvmlistbot at llvm.org
Mon Jul 4 00:20:19 PDT 2022
Author: Shraiysh Vaishay
Date: 2022-07-04T12:50:07+05:30
New Revision: 1063dfc0285362918aa6bce482a532cb911f64fb
URL: https://github.com/llvm/llvm-project/commit/1063dfc0285362918aa6bce482a532cb911f64fb
DIFF: https://github.com/llvm/llvm-project/commit/1063dfc0285362918aa6bce482a532cb911f64fb.diff
LOG: [mlir][openmp] Added omp.taskloop
This patch adds omp.taskloop operation to OpenMP Dialect along with
tests.
Reviewed By: peixin
Differential Revision: https://reviews.llvm.org/D127380
Added:
Modified:
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
mlir/test/Dialect/OpenMP/invalid.mlir
mlir/test/Dialect/OpenMP/ops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index eae2882b6ea9..761e964644b0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -132,7 +132,10 @@ def ParallelOp : OpenMP_Op<"parallel", [
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
- operand_range getReductionVars() { return reduction_vars(); }
+ SmallVector<Value> getReductionVars() {
+ return SmallVector<Value>(reduction_vars().begin(),
+ reduction_vars().end());
+ }
}];
}
@@ -237,7 +240,10 @@ def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments,
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
- operand_range getReductionVars() { return reduction_vars(); }
+ SmallVector<Value> getReductionVars() {
+ return SmallVector<Value>(reduction_vars().begin(),
+ reduction_vars().end());
+ }
}];
}
@@ -375,7 +381,10 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
// TODO: remove this once emitAccessorPrefix is set to
// kEmitAccessorPrefix_Prefixed for the dialect.
/// Returns the reduction variables
- operand_range getReductionVars() { return reduction_vars(); }
+ SmallVector<Value> getReductionVars() {
+ return SmallVector<Value>(reduction_vars().begin(),
+ reduction_vars().end());
+ }
}];
let hasCustomAssemblyFormat = 1;
let assemblyFormat = [{
@@ -393,7 +402,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
custom<ReductionVarList>(
$reduction_vars, type($reduction_vars), $reductions
) `)`
- ) `for` custom<WsLoopControl>($region, $lowerBound, $upperBound, $step,
+ ) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
type($step), $inclusive) attr-dict
}];
let hasVerifier = 1;
@@ -542,11 +551,172 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments,
}];
let extraClassDeclaration = [{
/// Returns the reduction variables
- operand_range getReductionVars() { return in_reduction_vars(); }
+ SmallVector<Value> getReductionVars() {
+ return SmallVector<Value>(in_reduction_vars().begin(),
+ in_reduction_vars().end());
+ }
}];
let hasVerifier = 1;
}
+def TaskLoopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
+ AutomaticAllocationScope, RecursiveSideEffects,
+ AllTypesMatch<["lowerBound", "upperBound", "step"]>,
+ ReductionClauseInterface]> {
+ let summary = "taskloop construct";
+ let description = [{
+ The taskloop construct specifies that the iterations of one or more
+ associated loops will be executed in parallel using explicit tasks. The
+ iterations are distributed across tasks generated by the construct and
+ scheduled to be executed.
+
+ The `lowerBound` and `upperBound` specify a half-open range: the range
+ includes the lower bound but does not include the upper bound. If the
+ `inclusive` attribute is specified then the upper bound is also included.
+ The `step` specifies the loop step.
+
+ The body region can contain any number of blocks.
+
+ ```
+ omp.taskloop <clauses>
+ for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+ %a = load %arrA[%i1, %i2] : memref<?x?xf32>
+ %b = load %arrB[%i1, %i2] : memref<?x?xf32>
+ %sum = arith.addf %a, %b : f32
+ store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
+ omp.terminator
+ }
+ ```
+
+ For definitions of "undeferred task", "included task", "final task" and
+ "mergeable task", please check OpenMP Specification.
+
+ When an `if` clause is present on a taskloop construct, and if the `if`
+ clause expression evaluates to `false`, undeferred tasks are generated. The
+ use of a variable in an `if` clause expression of a taskloop construct
+ causes an implicit reference to the variable in all enclosing constructs.
+
+ When a `final` clause is present on a taskloop construct and the `final`
+ clause expression evaluates to `true`, the generated tasks will be final
+ tasks. The use of a variable in a `final` clause expression of a taskloop
+ construct causes an implicit reference to the variable in all enclosing
+ constructs.
+
+ If the `untied` clause is specified, all tasks generated by the taskloop
+ construct are untied tasks.
+
+ When the `mergeable` clause is present on a taskloop construct, each
+ generated task is a mergeable task.
+
+ Reductions can be performed in a loop by specifying reduction accumulator
+ variables in `reduction_vars` or `in_reduction_vars` and symbols referring
+ to reduction declarations in the `reductions` or `in_reductions` attribute.
+ Each reduction is identified by the accumulator it uses and accumulators
+ must not be repeated in the same reduction. The `omp.reduction` operation
+ accepts the accumulator and a partial value which is considered to be
+ produced by the current loop iteration for the given reduction. If multiple
+ values are produced for the same accumulator, i.e. there are multiple
+ `omp.reduction`s, the last value is taken. The reduction declaration
+ specifies how to combine the values from each iteration into the final
+ value, which is available in the accumulator after the loop completes.
+
+ If an `in_reduction` clause is present on the taskloop construct, the
+ behavior is as if each generated task was defined by a task construct on
+ which an `in_reduction` clause with the same reduction operator and list
+ items is present. Thus, the generated tasks are participants of a reduction
+ previously defined by a reduction scoping clause.
+
+ If a `reduction` clause is present on the taskloop construct, the behavior
+ is as if a `task_reduction` clause with the same reduction operator and list
+ items was applied to the implicit taskgroup construct enclosing the taskloop
+ construct. The taskloop construct executes as if each generated task was
+ defined by a task construct on which an `in_reduction` clause with the same
+ reduction operator and list items is present. Thus, the generated tasks are
+ participants of the reduction defined by the `task_reduction` clause that
+ was applied to the implicit taskgroup construct.
+
+ When a `priority` clause is present on a taskloop construct, the generated
+ tasks use the `priority-value` as if it was specified for each individual
+ task. If the `priority` clause is not specified, tasks generated by the
+ taskloop construct have the default task priority (zero).
+
+ The `allocators_vars` and `allocate_vars` arguments are a variadic list of
+ values that specify the memory allocator to be used to obtain storage for
+ private values.
+
+ If a `grainsize` clause is present on the taskloop construct, the number of
+ logical loop iterations assigned to each generated task is greater than or
+ equal to the minimum of the value of the grain-size expression and the
+ number of logical loop iterations, but less than two times the value of the
+ grain-size expression.
+
+ If `num_tasks` is specified, the taskloop construct creates as many tasks as
+ the minimum of the num-tasks expression and the number of logical loop
+ iterations. Each task must have at least one logical loop iteration.
+
+ By default, the taskloop construct executes as if it was enclosed in a
+ taskgroup construct with no statements or directives outside of the taskloop
+ construct. Thus, the taskloop construct creates an implicit taskgroup
+ region. If the `nogroup` clause is present, no implicit taskgroup region is
+ created.
+ }];
+
+ let arguments = (ins Variadic<IntLikeType>:$lowerBound,
+ Variadic<IntLikeType>:$upperBound,
+ Variadic<IntLikeType>:$step,
+ UnitAttr:$inclusive,
+ Optional<I1>:$if_expr,
+ Optional<I1>:$final_expr,
+ UnitAttr:$untied,
+ UnitAttr:$mergeable,
+ Variadic<OpenMP_PointerLikeType>:$in_reduction_vars,
+ OptionalAttr<SymbolRefArrayAttr>:$in_reductions,
+ Variadic<OpenMP_PointerLikeType>:$reduction_vars,
+ OptionalAttr<SymbolRefArrayAttr>:$reductions,
+ Optional<IntLikeType>:$priority,
+ Variadic<AnyType>:$allocate_vars,
+ Variadic<AnyType>:$allocators_vars,
+ Optional<IntLikeType>: $grain_size,
+ Optional<IntLikeType>: $num_tasks,
+ UnitAttr: $nogroup);
+
+ let regions = (region AnyRegion:$region);
+
+ let assemblyFormat = [{
+ oilist(`if` `(` $if_expr `)`
+ |`final` `(` $final_expr `)`
+ |`untied` $untied
+ |`mergeable` $mergeable
+ |`in_reduction` `(`
+ custom<ReductionVarList>(
+ $in_reduction_vars, type($in_reduction_vars), $in_reductions
+ ) `)`
+ |`reduction` `(`
+ custom<ReductionVarList>(
+ $reduction_vars, type($reduction_vars), $reductions
+ ) `)`
+ |`priority` `(` $priority `:` type($priority) `)`
+ |`allocate` `(`
+ custom<AllocateAndAllocator>(
+ $allocate_vars, type($allocate_vars),
+ $allocators_vars, type($allocators_vars)
+ ) `)`
+ |`grain_size` `(` $grain_size `:` type($grain_size) `)`
+ |`num_tasks` `(` $num_tasks `:` type($num_tasks) `)`
+ |`nogroup` $nogroup
+ ) `for` custom<LoopControl>($region, $lowerBound, $upperBound, $step,
+ type($step), $inclusive) attr-dict
+ }];
+
+ let extraClassDeclaration = [{
+ /// Returns the reduction variables
+ SmallVector<Value> getReductionVars();
+ void getEffects(SmallVectorImpl<MemoryEffects::EffectInstance> &effects);
+ }];
+
+ let hasVerifier = 1;
+}
+
def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments,
ReductionClauseInterface,
AutomaticAllocationScope]> {
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 83180e03455e..d1dc9bd86bea 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -40,7 +40,7 @@ def ReductionClauseInterface : OpInterface<"ReductionClauseInterface"> {
let methods = [
InterfaceMethod<
- "Get reduction vars", "::mlir::Operation::operand_range",
+ "Get reduction vars", "::mlir::SmallVector<::mlir::Value>",
"getReductionVars">,
];
}
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 2f4989082b8b..d09ef969c2f6 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -523,11 +523,11 @@ LogicalResult SingleOp::verify() {
/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
/// steps := `step` `(`ssa-id-list`)`
ParseResult
-parseWsLoopControl(OpAsmParser &parser, Region ®ion,
- SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
- SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
- SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
- SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
+parseLoopControl(OpAsmParser &parser, Region ®ion,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
+ SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
// Parse an opening `(` followed by induction variables followed by `)`
SmallVector<OpAsmParser::Argument> ivs;
Type loopVarType;
@@ -557,10 +557,10 @@ parseWsLoopControl(OpAsmParser &parser, Region ®ion,
return parser.parseRegion(region, ivs);
}
-void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion,
- ValueRange lowerBound, ValueRange upperBound,
- ValueRange steps, TypeRange loopVarTypes,
- UnitAttr inclusive) {
+void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion,
+ ValueRange lowerBound, ValueRange upperBound,
+ ValueRange steps, TypeRange loopVarTypes,
+ UnitAttr inclusive) {
auto args = region.front().getArguments();
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
<< ") to (" << upperBound << ") ";
@@ -736,6 +736,43 @@ LogicalResult TaskGroupOp::verify() {
task_reduction_vars());
}
+//===----------------------------------------------------------------------===//
+// TaskLoopOp
+//===----------------------------------------------------------------------===//
+SmallVector<Value> TaskLoopOp::getReductionVars() {
+ SmallVector<Value> all_reduction_nvars(in_reduction_vars().begin(),
+ in_reduction_vars().end());
+ all_reduction_nvars.insert(all_reduction_nvars.end(),
+ reduction_vars().begin(), reduction_vars().end());
+ return all_reduction_nvars;
+}
+
+LogicalResult TaskLoopOp::verify() {
+ if (allocate_vars().size() != allocators_vars().size())
+ return emitError(
+ "expected equal sizes for allocate and allocator variables");
+ if (failed(verifyReductionVarList(*this, reductions(), reduction_vars())) ||
+ failed(
+ verifyReductionVarList(*this, in_reductions(), in_reduction_vars())))
+ return failure();
+
+ if (reduction_vars().size() > 0 && nogroup())
+ return emitError("if a reduction clause is present on the taskloop "
+ "directive, the nogroup clause must not be specified");
+ for (auto var : reduction_vars()) {
+ if (llvm::is_contained(in_reduction_vars(), var))
+ return emitError("the same list item cannot appear in both a reduction "
+ "and an in_reduction clause");
+ }
+
+ if (grain_size() && num_tasks()) {
+ return emitError(
+ "the grainsize clause and num_tasks clause are mutually exclusive and "
+ "may not appear on the same taskloop directive");
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// WsLoopOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index c08b80e5b79e..27ba2c88a789 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1294,3 +1294,128 @@ func.func @omp_cancellationpoint2() {
}
return
}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testmemref = "test.memref"() : () -> (memref<i32>)
+ // expected-error @below {{expected equal sizes for allocate and allocator variables}}
+ "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({
+ ^bb0(%arg3: i32, %arg4: i32):
+ "omp.terminator"() : () -> ()
+ }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> ()
+ return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+ "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
+ ^bb0(%arg3: i32, %arg4: i32):
+ "omp.terminator"() : () -> ()
+ }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
+ return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+ "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({
+ ^bb0(%arg3: i32, %arg4: i32):
+ "omp.terminator"() : () -> ()
+ }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
+ return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+ "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({
+ ^bb0(%arg3: i32, %arg4: i32):
+ "omp.terminator"() : () -> ()
+ }) {in_reductions = [@add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>, !llvm.ptr<f32>) -> ()
+ return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{expected as many reduction symbol references as reduction variables}}
+ "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({
+ ^bb0(%arg3: i32, %arg4: i32):
+ "omp.terminator"() : () -> ()
+ }) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr<f32>) -> ()
+ return
+}
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+ %0 = arith.constant 0.0 : f32
+ omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+ %1 = arith.addf %arg0, %arg1 : f32
+ omp.yield (%1 : f32)
+}
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}}
+ omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>) nogroup
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ omp.terminator
+ }
+ return
+}
+
+// -----
+
+omp.reduction.declare @add_f32 : f32
+init {
+^bb0(%arg: f32):
+ %0 = arith.constant 0.0 : f32
+ omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+ %1 = arith.addf %arg0, %arg1 : f32
+ omp.yield (%1 : f32)
+}
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}}
+ omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ omp.terminator
+ }
+ return
+}
+
+// -----
+
+func.func @taskloop(%lb: i32, %ub: i32, %step: i32) {
+ %testi64 = "test.i64"() : () -> (i64)
+ // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}}
+ omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ omp.terminator
+ }
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 5c8c0d83c24b..b4dccd72aa02 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
func.func @omp_barrier() -> () {
// CHECK: omp.barrier
@@ -1394,8 +1394,6 @@ func.func @omp_task(%bool_var: i1, %i64_var: i64, %i32_var: i32, %data_var: memr
return
}
-// -----
-
func.func @omp_threadprivate() {
%0 = arith.constant 1 : i32
%1 = arith.constant 2 : i32
@@ -1528,3 +1526,141 @@ func.func @omp_taskgroup_multiple_tasks() -> () {
}
return
}
+
+// CHECK-LABEL: @omp_taskloop
+func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
+
+ // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+ omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) {
+ omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) {
+ // CHECK: test.op1
+ "test.op1"(%lb) : (i32) -> ()
+ // CHECK: test.op2
+ "test.op2"() : () -> ()
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) {
+ omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ %testbool = "test.bool"() : () -> (i1)
+
+ // CHECK: omp.taskloop if(%{{[^)]+}})
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop if(%testbool)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop final(%{{[^)]+}})
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop final(%testbool)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop untied
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop untied
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop mergeable
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop mergeable
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ %testf32 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ %testf32_2 = "test.f32"() : () -> (!llvm.ptr<f32>)
+ // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>, @add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>, @add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>) reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr<f32>) reduction(@add_f32 -> %testf32_2 : !llvm.ptr<f32>)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ %testi32 = "test.i32"() : () -> (i32)
+ // CHECK: omp.taskloop priority(%{{[^:]+}}: i32)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop priority(%testi32: i32)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ %testmemref = "test.memref"() : () -> (memref<i32>)
+ // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
+ omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ %testi64 = "test.i64"() : () -> (i64)
+ // CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop grain_size(%testi64: i64)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64)
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop num_tasks(%testi64: i64)
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.taskloop nogroup
+ // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) {
+ omp.taskloop nogroup
+ for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: return
+ return
+}
More information about the Mlir-commits
mailing list