[Mlir-commits] [mlir] [MLIR][OpenMP] Add canonical loop LLVM-IR lowering (PR #147069)
Michael Kruse
llvmlistbot at llvm.org
Thu Jul 10 06:57:45 PDT 2025
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/147069
>From 4a18faad39910ea4d4b66185ba25488ea905e019 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 3 Jul 2025 10:51:55 +0200
Subject: [PATCH 1/3] Emit named operand indices
---
mlir/test/mlir-tblgen/op-operand.td | 11 +++++++++++
mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 11 +++++++++++
2 files changed, 22 insertions(+)
diff --git a/mlir/test/mlir-tblgen/op-operand.td b/mlir/test/mlir-tblgen/op-operand.td
index a2fa1f7046a97..ab8d721ed5427 100644
--- a/mlir/test/mlir-tblgen/op-operand.td
+++ b/mlir/test/mlir-tblgen/op-operand.td
@@ -13,6 +13,9 @@ def OpA : NS_Op<"one_normal_operand_op", []> {
let arguments = (ins I32:$input);
}
+// DECL-LABEL: class OpA : {{.*}} {
+// DECL: static constexpr int odsIndex_input = 0;
+
// CHECK-LABEL: OpA definitions
// CHECK: void OpA::build
@@ -28,6 +31,9 @@ def OpB : NS_Op<"one_variadic_operand_op", []> {
let arguments = (ins Variadic<I32>:$input);
}
+// DECL-LABEL: class OpB : {{.*}} {
+// DECL: static constexpr int odsIndex_input = 0;
+
// CHECK-LABEL: OpB::build
// CHECK: ::mlir::ValueRange input
// CHECK-NOT: assert
@@ -37,6 +43,11 @@ def OpD : NS_Op<"mix_variadic_and_normal_inputs_op", [SameVariadicOperandSize]>
let arguments = (ins Variadic<AnyTensor>:$input1, AnyTensor:$input2, Variadic<AnyTensor>:$input3);
}
+// DECL-LABEL: class OpD : {{.*}} {
+// DECL: static constexpr int odsIndex_input1 = 0;
+// DECL: static constexpr int odsIndex_input2 = 1;
+// DECL: static constexpr int odsIndex_input3 = 2;
+
// DECL-LABEL: ::mlir::Operation::operand_range getInput1
// DECL-NEXT: return getODSOperands(0);
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 6008ed4673d1b..cbb4030f3adb4 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -2223,6 +2223,17 @@ generateNamedOperandGetters(const Operator &op, Class &opClass,
"'SameVariadicOperandSize' traits");
}
+ // Print the ods names so they don't need to be hardcoded in the source.
+ for (int i = 0; i != numOperands; ++i) {
+ const auto &operand = op.getOperand(i);
+ if (operand.name.empty())
+ continue;
+
+ opClass.declare<Field>("static constexpr int", Twine("odsIndex_") +
+ operand.name + " = " +
+ Twine(i));
+ }
+
// First emit a few "sink" getter methods upon which we layer all nicer named
// getter methods.
// If generating for an adaptor, the method is put into the non-templated
>From c7ed06d92dc7e256bff5370eb3d74d3a43547625 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 4 Jul 2025 14:50:46 +0200
Subject: [PATCH 2/3] Add omp.canonical_loop and omp.unroll modelling
---
.../mlir/Dialect/OpenMP/OpenMPDialect.h | 5 +
.../mlir/Dialect/OpenMP/OpenMPOpBase.td | 11 +
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 207 ++++++++++
.../Dialect/OpenMP/OpenMPOpsInterfaces.td | 86 +++++
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 353 ++++++++++++++++++
.../OpenMP/cli-canonical_loop-invalid.mlir | 50 +++
.../Dialect/OpenMP/cli-canonical_loop.mlir | 157 ++++++++
.../Dialect/OpenMP/cli-unroll-heuristic.mlir | 59 +++
8 files changed, 928 insertions(+)
create mode 100644 mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
create mode 100644 mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
create mode 100644 mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
index ab11a6094e3e7..7cf738352ba47 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
@@ -37,4 +37,9 @@
#define GET_OP_CLASSES
#include "mlir/Dialect/OpenMP/OpenMPOps.h.inc"
+namespace mlir::omp {
+/// Find the omp.new_cli, generator, and consumer of a canonical loop info.
+std::tuple<NewCliOp, OpOperand *, OpOperand *> decodeCli(mlir::Value cli);
+} // namespace mlir::omp
+
#endif // MLIR_DIALECT_OPENMP_OPENMPDIALECT_H_
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index f3dd44d2c0717..bbcfb87fa03c6 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -204,4 +204,15 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
let regions = !if(singleRegion, (region AnyRegion:$region), (region));
}
+
+// Base class for OpenMP loop transformations (that either consume or generate
+// loops)
+//
+// Doesn't actually create a C++ base class (only defines default values for
+// tablegen classes that derive from this). Use LoopTransformationInterface
+// instead for common operations.
+class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
+ OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > {
+}
+
#endif // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index ac80926053a2d..703384a0680d0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpAsmInterface.td"
include "mlir/IR/OpBase.td"
include "mlir/IR/SymbolInterfaces.td"
@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
let hasVerifier = 1;
}
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+ let summary = "Type for representing a reference to a canonical loop";
+ let description = [{
+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+ canonical loop in the same function. Values of this type are not
+ available at runtime and therefore cannot be used by the program itself,
+ i.e. an opaque type. It is similar to the transform dialect's
+ `!transform.interface` type, but instead of implementing an interface
+ for each transformation, the OpenMP dialect itself defines possible
+ operations on this type.
+
+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+ 1. created by omp.new_cli.
+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+ can only be associated once.
+ 3. passed to an omp loop transformation operation that modifies the loop
+ associated with the CLI. The CLI is the "applyee" and the operation is
+ the consumer. A CLI can only be consumed once.
+ 4. passed to an omp loop transformation operation to associate the cli with
+ a result of that transformation. The CLI is the "generatee" and the
+ operation is the generator.
+
+ A CLI cannot
+
+ 1. be returned from a function.
+ 2. be passed to operations that are not specifically designed to take a
+ CanonicalLoopInfoType, including AnyType.
+
+ A CLI directly corresponds to an object of
+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+ }];
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Creation
+//===---------------------------------------------------------------------===//
+
+def NewCliOp : OpenMP_Op<"new_cli",
+ [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
+ let summary = "Create a new Canonical Loop Info value.";
+ let description = [{
+ Create a new CLI that can be passed as an argument to a CanonicalLoopOp
+ and to loop transformation operations to handle dependencies between
+ loop transformation operations.
+ }];
+
+ let arguments = (ins );
+ let results = (outs CanonicalLoopInfoType:$result);
+ let assemblyFormat = [{
+ attr-dict
+ }];
+
+ let builders = [
+ OpBuilder<(ins )>,
+ ];
+
+ let hasVerifier = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Operation
+//===---------------------------------------------------------------------===//
+def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop",
+ [DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", "getAsmBlockArgumentNames"]>]> {
+ let summary = "OpenMP Canonical Loop Operation";
+ let description = [{
+ All loops that conform to OpenMP's definition of a canonical loop can be
+ simplified to a CanonicalLoopOp. In particular, there are no loop-carried
+ variables and the number of iterations it will execute is known before the
+ operation. This allows e.g. to determine the number of threads and chunks
+ the iterations space is split into before executing any iteration. More
+ restrictions may apply in cases such as (collapsed) loop nests, doacross
+ loops, etc.
+
+ In contrast to other loop operations such as `scf.for`, the number of
+ iterations is determined by only a single variable, the trip-count. The
+ induction variable value is the logical iteration number of that iteration,
+ which OpenMP defines to be between 0 and the trip-count (exclusive).
+ Loop representation having lower-bound, upper-bound, and step-size operands,
+ require passes to do more work than necessary, including handling special
+ cases such as upper-bound smaller than lower-bound, upper-bound equal to
+ the integer type's maximal value, negative step size, etc. This complexity
+ is better only handled once by the front-end and can apply its semantics
+ for such cases while still being able to represent any kind of loop, which
+ kind of the point of a mid-end intermediate representation. User-defined
+ types such as random-access iterators in C++ could not directly be
+ represented anyway.
+
+ The induction variable is always of the same type as the tripcount argument.
+ Since it can never be negative, tripcount is always interpreted as an
+ unsigned integer. It is the caller's responsibility to ensure the tripcount
+ is not negative when its interpretation is signed, i.e.
+ `%tripcount = max(0,%tripcount)`.
+
+ An optional argument to a omp.canonical_loop that can be passed in
+ is a CanonicalLoopInfo value that can be used to refer to the canonical
+ loop to apply transformations -- such as tiling, unrolling, or
+ work-sharing -- to the loop, similar to the transform dialect but
+ with OpenMP-specific semantics. Because it is optional, it has to be the
+ last of the operands, but appears first in the pretty format printing.
+
+ The pretty assembly format is inspired by python syntax, where `range(n)`
+ returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
+ is one of:
+
+ omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
+ omp.canonical_loop %iv : !type in range(%tripcount)
+
+ A CanonicalLoopOp is lowered to LLVM-IR using
+ `OpenMPIRBuilder::createCanonicalLoop`.
+
+ #### Examples
+
+ Translation from lower-bound, upper-bound, step-size to trip-count.
+ ```c
+ for (int i = 3; i < 42; i+=2) {
+ B[i] = A[i];
+ }
+ ```
+
+ ```mlir
+ %lb = arith.constant 3 : i32
+ %ub = arith.constant 42 : i32
+ %step = arith.constant 2 : i32
+ %range = arith.sub %ub, %lb : i32
+ %tripcount = arith.div %range, %step : i32
+ omp.canonical_loop %iv : i32 in range(%tripcount) {
+ %offset = arith.mul %iv, %step : i32
+ %i = arith.add %offset, %lb : i32
+ %a = load %arrA[%i] : memref<?xf32>
+ store %a, %arrB[%i] : memref<?xf32>
+ }
+ ```
+
+ Nested canonical loop with transformation of the inner loop.
+ ```mlir
+ %outer = omp.new_cli : !omp.cli
+ %inner = omp.new_cli : !omp.cli
+ omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
+ omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
+ %a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
+ store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
+ }
+ }
+ omp.unroll_full(%inner)
+ ```
+ }];
+
+
+ let arguments = (ins IntLikeType:$tripCount,
+ Optional<CanonicalLoopInfoType>:$cli);
+ let regions = (region AnyRegion:$region);
+
+ let extraClassDeclaration = [{
+ ::mlir::Value getInductionVar();
+ }];
+
+ let builders = [
+ OpBuilder<(ins "::mlir::Value":$tripCount)>,
+ OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
+ ];
+
+ let hasCustomAssemblyFormat = 1;
+ let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMP unroll_heuristic operation
+//===----------------------------------------------------------------------===//
+
+def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
+ let summary = "OpenMP heuristic unroll operation";
+ let description = [{
+ Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
+
+ The operation has one applyee and no generatees. The applyee is unrolled
+ according to implementation-defined heuristics. Implementations may choose
+ to not unroll the loop, partially unroll by a chosen factor, or fully
+ unroll it. Even if the implementation chooses to partially unroll the
+ applyee, the resulting unrolled loop is not accessible as a generatee. Use
+ omp.unroll_partial if a generatee is required.
+
+ The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
+ which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
+ unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
+ actually performed in optimized builds.
+
+ Assembly formats:
+ omp.unroll_heuristic(%cli)
+ omp.unroll_heuristic(%cli) -> ()
+ }];
+
+ let arguments = (ins CanonicalLoopInfoType:$applyee);
+
+ let builders = [
+ OpBuilder<(ins "::mlir::Value":$cli)>,
+ ];
+
+ let hasCustomAssemblyFormat = 1;
+}
+
//===----------------------------------------------------------------------===//
// 2.8.3 Workshare Construct
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 92bf34ef3145f..8d794918d57d5 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -551,4 +551,90 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
];
}
+def LoopTransformationInterface : OpInterface<"LoopTransformationInterface"> {
+ let description = [{
+ Methods that are common for OpenMP loop transformation operations.
+ }];
+
+ let cppNamespace = "::mlir::omp";
+
+ let methods = [
+
+ InterfaceMethod<
+ /*description=*/[{
+ Get the indices for the arguments that represent CanonicalLoopInfo
+ applyees, i.e. loops that are transformed/consumed by this operation.
+ }],
+ /*returnType=*/ "std::pair<unsigned, unsigned>",
+ /*methodName=*/ "getApplyeesODSOperandIndexAndLength",
+ /*args=*/(ins)
+ >,
+
+ InterfaceMethod<
+ /*description=*/[{
+ Get the indices for the arguments that represent CanonicalLoopInfo
+ generatees, i.e. loops that created by this operation.
+ }],
+ /*returnType=*/ "std::pair<unsigned, unsigned>",
+ /*methodName=*/ "getGenerateesODSOperandIndexAndLength",
+ /*args=*/(ins)
+ >,
+
+ InterfaceMethod<
+ /*description=*/[{
+ Return the number of applyees of this loop transformation.
+ }],
+ /*returnType=*/ "unsigned",
+ /*methodName=*/ "getNumApplyees",
+ /*args=*/ (ins),
+ /*methodBody=*/ "",
+ /*defaultImpl=*/[{
+ return $_op.getApplyeesODSOperandIndexAndLength().second;
+ }]
+ >,
+
+ InterfaceMethod<
+ /*description=*/[{
+ Return the number of generatees of this loop transformation.
+ }],
+ /*returnType=*/ "unsigned",
+ /*methodName=*/ "getNumGeneratees",
+ /*args=*/ (ins),
+ /*methodBody=*/ "",
+ /*defaultImpl=*/[{
+ return $_op.getGenerateesODSOperandIndexAndLength().second;
+ }]
+ >,
+
+ InterfaceMethod<
+ /*description=*/[{
+ Return whether the provided operand is a generatee of this operation.
+ }],
+ /*returnType=*/ "unsigned",
+ /*methodName=*/ "isApplyee",
+ /*args=*/ (ins "unsigned":$opnum),
+ /*methodBody=*/ "",
+ /*defaultImpl=*/[{
+ auto applyeeArgs = $_op.getApplyeesODSOperandIndexAndLength();
+ return (applyeeArgs.first <= opnum && opnum < applyeeArgs.first + applyeeArgs.second) ;
+ }]
+ >,
+
+ InterfaceMethod<
+ /*description=*/[{
+ Return whether the provided operand is a generatee of this operation.
+ }],
+ /*returnType=*/ "unsigned",
+ /*methodName=*/ "isGeneratee",
+ /*args=*/ (ins "unsigned":$opnum),
+ /*methodBody=*/ "",
+ /*defaultImpl=*/[{
+ auto generateeArgs = $_op.getGenerateesODSOperandIndexAndLength();
+ return (generateeArgs.first <= opnum && opnum < generateeArgs.first + generateeArgs.second) ;
+ }]
+ >,
+
+ ];
+}
+
#endif // OPENMP_OPS_INTERFACES
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index e94d570b57122..abd2e92e344ba 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/SmallString.h"
@@ -3012,6 +3013,358 @@ void LoopNestOp::gatherWrappers(
}
}
+//===----------------------------------------------------------------------===//
+// OpenMP canonical loop handling
+//===----------------------------------------------------------------------===//
+
+std::tuple<NewCliOp, OpOperand *, OpOperand *>
+mlir::omp ::decodeCli(Value cli) {
+
+ // Defining a CLI for a generated loop is optional; if there is none then
+ // there is no followup-tranformation
+ if (!cli)
+ return {{}, nullptr, nullptr};
+
+ MLIRContext *ctx = cli.getContext();
+ assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+ "Unexpected type of cli");
+
+ NewCliOp create = cast<NewCliOp>(cli.getDefiningOp());
+ OpOperand *gen = nullptr;
+ OpOperand *cons = nullptr;
+ for (OpOperand &use : cli.getUses()) {
+ auto op = cast<LoopTransformationInterface>(use.getOwner());
+
+ unsigned opnum = use.getOperandNumber();
+ if (op.isGeneratee(opnum)) {
+ assert(!gen && "Each CLI may have at most one def");
+ gen = &use;
+ } else if (op.isApplyee(opnum)) {
+ assert(!cons && "Each CLI may have at most one consumer");
+ cons = &use;
+ } else {
+ llvm_unreachable("Unexpected operand for a CLI");
+ }
+ }
+
+ return {create, gen, cons};
+}
+
+void NewCliOp::build(::mlir::OpBuilder &odsBuilder,
+ ::mlir::OperationState &odsState) {
+ odsState.addTypes(CanonicalLoopInfoType::get(odsBuilder.getContext()));
+}
+
+void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
+ Value result = getResult();
+ auto [newCli, gen, cons] = decodeCli(result);
+
+ // Derive the CLI variable name from its generator:
+ // * "canonloop" for omp.canonical_loop
+ // * custom name for loop transformation generatees
+ // * "cli" as fallback if no generator
+ // * "_r<idx>" suffix for nested loops, where <idx> is the sequential order
+ // at that level
+ // * "_s<idx>" suffix for operations with multiple regions, where <idx> is
+ // the index of that region
+ std::string cliName{"cli"};
+ if (gen) {
+ cliName =
+ TypeSwitch<Operation *, std::string>(gen->getOwner())
+ .Case([&](CanonicalLoopOp op) {
+ // Find the canonical loop nesting: For each ancestor add a
+ // "+_r<idx>" suffix (in reverse order)
+ SmallVector<std::string> components;
+ Operation *o = op.getOperation();
+ while (o) {
+ if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+ break;
+
+ Region *r = o->getParentRegion();
+ if (!r)
+ break;
+
+ auto getSequentialIndex = [](Region *r, Operation *o) {
+ llvm::ReversePostOrderTraversal<Block *> traversal(
+ &r->getBlocks().front());
+ size_t idx = 0;
+ for (Block *b : traversal) {
+ for (Operation &op : *b) {
+ if (&op == o)
+ return idx;
+ // Only consider operations that are containers as
+ // possible children
+ if (!op.getRegions().empty())
+ idx += 1;
+ }
+ }
+ llvm_unreachable("Operation not part of the region");
+ };
+ size_t sequentialIdx = getSequentialIndex(r, o);
+ components.push_back(("s" + Twine(sequentialIdx)).str());
+
+ Operation *parent = r->getParentOp();
+ if (!parent)
+ break;
+
+ // If the operation has more than one region, also count in
+ // which of the regions
+ if (parent->getRegions().size() > 1) {
+ auto getRegionIndex = [](Operation *o, Region *r) {
+ for (auto [idx, region] :
+ llvm::enumerate(o->getRegions())) {
+ if (®ion == r)
+ return idx;
+ }
+ llvm_unreachable("Region not child its parent operation");
+ };
+ size_t regionIdx = getRegionIndex(parent, r);
+ components.push_back(("r" + Twine(regionIdx)).str());
+ }
+
+ // next parent
+ o = parent;
+ }
+
+ SmallString<64> Name("canonloop");
+ for (std::string s : reverse(components)) {
+ Name += '_';
+ Name += s;
+ }
+
+ return Name;
+ })
+ .Case([&](UnrollHeuristicOp op) -> std::string {
+ llvm_unreachable("heuristic unrolling does not generate a loop");
+ })
+ .Default([&](Operation *op) {
+ assert(!"TODO: Custom name for this operation");
+ return "transformed";
+ });
+ }
+
+ setNameFn(result, cliName);
+}
+
+LogicalResult NewCliOp::verify() {
+ Value cli = getResult();
+
+ MLIRContext *ctx = cli.getContext();
+ assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+ "Unexpected type of cli");
+
+ // Check that the CLI is used in at most generator and one consumer
+ OpOperand *gen = nullptr;
+ OpOperand *cons = nullptr;
+ for (mlir::OpOperand &use : cli.getUses()) {
+ auto op = cast<mlir::omp::LoopTransformationInterface>(use.getOwner());
+
+ unsigned opnum = use.getOperandNumber();
+ if (op.isGeneratee(opnum)) {
+ if (gen) {
+ InFlightDiagnostic error =
+ emitOpError("CLI must have at most one generator");
+ error.attachNote(gen->getOwner()->getLoc())
+ .append("first generator here:");
+ error.attachNote(use.getOwner()->getLoc())
+ .append("second generator here:");
+ return error;
+ }
+
+ gen = &use;
+ } else if (op.isApplyee(opnum)) {
+ if (cons) {
+ InFlightDiagnostic error =
+ emitOpError("CLI must have at most one consumer");
+ error.attachNote(cons->getOwner()->getLoc())
+ .append("first consumer here:")
+ .appendOp(*cons->getOwner(),
+ OpPrintingFlags().printGenericOpForm());
+ error.attachNote(use.getOwner()->getLoc())
+ .append("second consumer here:")
+ .appendOp(*use.getOwner(), OpPrintingFlags().printGenericOpForm());
+ return error;
+ }
+
+ cons = &use;
+ } else {
+ llvm_unreachable("Unexpected operand for a CLI");
+ }
+ }
+
+ // If the CLI is source of a transformation, it must have a generator
+ if (cons && !gen) {
+ InFlightDiagnostic error = emitOpError("CLI has no generator");
+ error.attachNote(cons->getOwner()->getLoc())
+ .append("see consumer here: ")
+ .appendOp(*cons->getOwner(), OpPrintingFlags().printGenericOpForm());
+ return error;
+ }
+
+ return success();
+}
+
+void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+ Value tripCount) {
+ odsState.addOperands(tripCount);
+ odsState.addOperands(Value());
+ (void)odsState.addRegion();
+}
+
+void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+ Value tripCount, ::mlir::Value cli) {
+ odsState.addOperands(tripCount);
+ odsState.addOperands(cli);
+ (void)odsState.addRegion();
+}
+
+void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
+ setNameFn(&getRegion().front(), "body_entry");
+}
+
+void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion,
+ OpAsmSetValueNameFn setNameFn) {
+ setNameFn(region.getArgument(0), "iv");
+}
+
+void CanonicalLoopOp::print(OpAsmPrinter &p) {
+ if (getCli())
+ p << '(' << getCli() << ')';
+ p << ' ' << getInductionVar() << " : " << getInductionVar().getType()
+ << " in range(" << getTripCount() << ") ";
+
+ p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/true);
+
+ p.printOptionalAttrDict((*this)->getAttrs());
+}
+
+mlir::ParseResult CanonicalLoopOp::parse(::mlir::OpAsmParser &parser,
+ ::mlir::OperationState &result) {
+ CanonicalLoopInfoType cliType =
+ CanonicalLoopInfoType::get(parser.getContext());
+
+ // Parse (optional) omp.cli identifier
+ OpAsmParser::UnresolvedOperand cli;
+ SmallVector<mlir::Value, 1> cliOperand;
+ if (!parser.parseOptionalLParen()) {
+ if (parser.parseOperand(cli) ||
+ parser.resolveOperand(cli, cliType, cliOperand) || parser.parseRParen())
+ return failure();
+ }
+
+ // We derive the type of tripCount from inductionVariable. MLIR requires the
+ // type of tripCount to be known when calling resolveOperand so we have parse
+ // the type before processing the inductionVariable.
+ OpAsmParser::Argument inductionVariable;
+ OpAsmParser::UnresolvedOperand tripcount;
+ if (parser.parseArgument(inductionVariable, /*allowType*/ true) ||
+ parser.parseKeyword("in") || parser.parseKeyword("range") ||
+ parser.parseLParen() || parser.parseOperand(tripcount) ||
+ parser.parseRParen() ||
+ parser.resolveOperand(tripcount, inductionVariable.type, result.operands))
+ return failure();
+
+ // Parse the loop body.
+ Region *region = result.addRegion();
+ if (parser.parseRegion(*region, {inductionVariable}))
+ return failure();
+
+ // We parsed the cli operand forst, but because it is optional, it must be
+ // last in the operand list.
+ result.operands.append(cliOperand);
+
+ // Parse the optional attribute list.
+ if (parser.parseOptionalAttrDict(result.attributes))
+ return failure();
+
+ return mlir::success();
+}
+
+LogicalResult CanonicalLoopOp::verify() {
+ // The region's entry must accept the induction variable
+ // It can also be empty if just created
+ if (!getRegion().empty()) {
+ Region ®ion = getRegion();
+ if (region.getNumArguments() != 1)
+ return emitOpError(
+ "Canonical loop region must have exactly one argument");
+
+ if (getInductionVar().getType() != getTripCount().getType())
+ return emitOpError(
+ "Region argument must be the same type as the trip count");
+ }
+
+ return success();
+}
+
+Value CanonicalLoopOp::getInductionVar() { return getRegion().getArgument(0); }
+
+std::pair<unsigned, unsigned>
+CanonicalLoopOp::getApplyeesODSOperandIndexAndLength() {
+ // No applyees
+ return {0, 0};
+}
+
+std::pair<unsigned, unsigned>
+CanonicalLoopOp::getGenerateesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_cli);
+}
+
+//===----------------------------------------------------------------------===//
+// UnrollHeuristicOp
+//===----------------------------------------------------------------------===//
+
+void UnrollHeuristicOp::build(::mlir::OpBuilder &odsBuilder,
+ ::mlir::OperationState &odsState,
+ ::mlir::Value cli) {
+ odsState.addOperands(cli);
+}
+
+void UnrollHeuristicOp::print(OpAsmPrinter &p) {
+ p << '(' << getApplyee() << ')';
+
+ p.printOptionalAttrDict((*this)->getAttrs());
+}
+
+mlir::ParseResult UnrollHeuristicOp::parse(::mlir::OpAsmParser &parser,
+ ::mlir::OperationState &result) {
+ auto cliType = CanonicalLoopInfoType::get(parser.getContext());
+
+ if (parser.parseLParen())
+ return failure();
+
+ OpAsmParser::UnresolvedOperand applyee;
+ if (parser.parseOperand(applyee) ||
+ parser.resolveOperand(applyee, cliType, result.operands))
+ return failure();
+
+ if (parser.parseRParen())
+ return failure();
+
+ // Optional output loop (full unrolling has none)
+ if (!parser.parseOptionalArrow()) {
+ if (parser.parseLParen() || parser.parseRParen())
+ return failure();
+ }
+
+ // Parse the optional attribute list.
+ if (parser.parseOptionalAttrDict(result.attributes))
+ return failure();
+
+ return mlir::success();
+}
+
+std::pair<unsigned, unsigned>
+UnrollHeuristicOp ::getApplyeesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_applyee);
+}
+
+std::pair<unsigned, unsigned>
+UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
+ return {0, 0};
+}
+
//===----------------------------------------------------------------------===//
// Critical construct (2.17.1)
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
new file mode 100644
index 0000000000000..345c53d2890b8
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
@@ -0,0 +1,50 @@
+// RUN: mlir-opt %s -verify-diagnostics
+
+
+func.func @omp_canonloop_block_arg(%tc : i32) -> () {
+ // expected-error at below {{Canonical loop region must have exactly one argument}}
+ "omp.canonical_loop" (%tc) ({
+ ^bb0(%iv: i32, %somearg: i32):
+ omp.terminator
+ }) : (i32) -> ()
+ return
+}
+
+
+func.func @omp_canonloop_multiple_generators(%tc : i32) -> () {
+ // expected-error at below {{'omp.new_cli' op CLI must have at most one generator}}
+ %cli = omp.new_cli
+ // expected-note at below {{second generator here}}
+ omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) {
+ omp.terminator
+ }
+ // expected-note at below {{first generator here}}
+ omp.canonical_loop(%cli) %iv2 : i32 in range(%tc) {
+ omp.terminator
+ }
+ return
+}
+
+
+func.func @omp_canonloop_multiple_consumers() -> () {
+ // expected-error at below {{'omp.new_cli' op CLI must have at most one consumer}}
+ %cli = omp.new_cli
+ %tc = llvm.mlir.constant(4 : i32) : i32
+ omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) {
+ omp.terminator
+ }
+ // expected-note at below {{second consumer here}}
+ omp.unroll_heuristic(%cli)
+ // expected-note at below {{first consumer here}}
+ omp.unroll_heuristic(%cli)
+ return
+}
+
+
+func.func @omp_canonloop_no_generator() -> () {
+ // expected-error at below {{'omp.new_cli' op CLI has no generator}}
+ %cli = omp.new_cli
+ // expected-note at below {{see consumer here}}
+ omp.unroll_heuristic(%cli)
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
new file mode 100644
index 0000000000000..adadb8bbac49d
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -0,0 +1,157 @@
+// RUN: mlir-opt %s | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+
+
+// CHECK-LABEL: @omp_canonloop_raw(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_raw(%tc : i32) -> () {
+ // CHECK: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc) ({
+ ^bb0(%iv: i32):
+ // CHECK-NEXT: = llvm.add %iv, %iv : i32
+ %newval = llvm.add %iv, %iv : i32
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }) : (i32) -> ()
+ // CHECK-NEXT: return
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_raw(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc, %canonloop_s0) ({
+ ^bb_first(%iv_first: i32):
+ // CHECK-NEXT: = llvm.add %iv, %iv : i32
+ %newval = llvm.add %iv_first, %iv_first : i32
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }) : (i32, !omp.cli) -> ()
+
+ // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+ %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc, %canonloop_s1) ({
+ ^bb_second(%iv_second: i32):
+ // CHECK: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }) : (i32, !omp.cli) -> ()
+
+ // CHECK-NEXT: return
+ return
+}
+
+
+// CHECK-LABEL: @omp_nested_canonloop_raw(
+// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
+func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %outer = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ %inner = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+ "omp.canonical_loop" (%tc_outer, %outer) ({
+ ^bb_outer(%iv_outer: i32):
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+ "omp.canonical_loop" (%tc_inner, %inner) ({
+ ^bb_inner(%iv_inner: i32):
+ // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+ %newval = llvm.add %iv_outer, %iv_inner: i32
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }) : (i32, !omp.cli) -> ()
+ // CHECK: omp.terminator
+ omp.terminator
+ }) : (i32, !omp.cli) -> ()
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop %iv : i32 in range(%tc) {
+ // CHECK-NEXT: llvm.add %iv, %iv : i32
+ %newval = llvm.add %iv, %iv: i32
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_constant_pretty()
+func.func @omp_canonloop_constant_pretty() -> () {
+ // CHECK-NEXT: %[[tc:.+]] = llvm.mlir.constant(4 : i32) : i32
+ %tc = llvm.mlir.constant(4 : i32) : i32
+ // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop %iv : i32 in range(%tc) {
+ // CHECK-NEXT: llvm.add %iv, %iv : i32
+ %newval = llvm.add %iv, %iv: i32
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: %canonloop_s1 = omp.new_cli
+ %canonloop_s1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ return
+}
+
+
+// CHECK-LABEL: @omp_newcli_unused(
+// CHECK-SAME: )
+func.func @omp_newcli_unused() -> () {
+ // CHECK-NEXT: %cli = omp.new_cli
+ %cli = omp.new_cli
+ // CHECK-NEXT: return
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
new file mode 100644
index 0000000000000..cda7d0b500166
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -0,0 +1,59 @@
+// RUN: mlir-opt %s | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_raw(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc, %canonloop) ({
+ ^bb0(%iv: i32):
+ omp.terminator
+ }) : (i32, !omp.cli) -> ()
+ // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
+ return
+}
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+ // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ omp.unroll_heuristic(%canonloop)
+ return
+}
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %cli_outer = omp.new_cli
+ // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ %cli_inner = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ omp.unroll_heuristic(%cli_outer)
+ // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+ omp.unroll_heuristic(%cli_inner)
+ return
+}
>From da2613d525deb4edcf0fac41e865ca0510c75210 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 4 Jul 2025 16:26:20 +0200
Subject: [PATCH 3/3] omp.canonical_loop and omp.unroll_heuristic lowering
---
.../mlir/Target/LLVMIR/ModuleTranslation.h | 43 +++++
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 10 +
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 78 ++++++++
.../LLVMIR/openmp-cli-canonical_loop.mlir | 175 ++++++++++++++++++
.../LLVMIR/openmp-cli-unroll-heuristic01.mlir | 56 ++++++
.../LLVMIR/openmp-cli-unroll-heuristic02.mlir | 93 ++++++++++
6 files changed, 455 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 79e8bb6add0da..5d52cf3f04b6a 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -15,6 +15,7 @@
#define MLIR_TARGET_LLVMIR_MODULETRANSLATION_H
#include "mlir/Dialect/LLVMIR/LLVMInterfaces.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/IR/Value.h"
@@ -24,6 +25,7 @@
#include "mlir/Target/LLVMIR/TypeToLLVM.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/FPEnv.h"
namespace llvm {
@@ -108,6 +110,41 @@ class ModuleTranslation {
return blockMapping.lookup(block);
}
+ /// Find the LLVM-IR loop that represents an MLIR loop.
+ llvm::CanonicalLoopInfo *lookupOMPLoop(omp::NewCliOp mlir) const {
+ llvm::CanonicalLoopInfo *result = loopMapping.lookup(mlir);
+ assert(result && "attempt to get non-existing loop");
+ return result;
+ }
+
+ /// Find the LLVM-IR loop that represents an MLIR loop.
+ llvm::CanonicalLoopInfo *lookupOMPLoop(Value mlir) const {
+ return lookupOMPLoop(mlir.getDefiningOp<omp::NewCliOp>());
+ }
+
+ /// Mark an OpenMP loop as having been consumed.
+ void invalidateOmpLoop(omp::NewCliOp mlir) { loopMapping.erase(mlir); }
+
+ /// Mark an OpenMP loop as having been consumed.
+ void invalidateOmpLoop(Value mlir) {
+ invalidateOmpLoop(mlir.getDefiningOp<omp::NewCliOp>());
+ }
+
+ /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR
+ /// OpenMPIRBuilder CanonicalLoopInfo
+ void mapOmpLoop(omp::NewCliOp mlir, llvm::CanonicalLoopInfo *llvm) {
+ assert(llvm && "argument must be non-null");
+ llvm::CanonicalLoopInfo *&cur = loopMapping[mlir];
+ assert(cur == nullptr && "attempting to map a loop that is already mapped");
+ cur = llvm;
+ }
+
+ /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR
+ /// OpenMPIRBuilder CanonicalLoopInfo
+ void mapOmpLoop(Value mlir, llvm::CanonicalLoopInfo *llvm) {
+ mapOmpLoop(mlir.getDefiningOp<omp::NewCliOp>(), llvm);
+ }
+
/// Stores the mapping between an MLIR operation with successors and a
/// corresponding LLVM IR instruction.
void mapBranch(Operation *mlir, llvm::Instruction *llvm) {
@@ -381,6 +418,12 @@ class ModuleTranslation {
DenseMap<Value, llvm::Value *> valueMapping;
DenseMap<Block *, llvm::BasicBlock *> blockMapping;
+ /// List of not yet consumed MLIR loop handles (represented by an omp.new_cli
+ /// operation which creates a value of type CanonicalLoopInfoType) and their
+ /// LLVM-IR representation as CanonicalLoopInfo which is managed by the
+ /// OpenMPIRBuilder.
+ DenseMap<omp::NewCliOp, llvm::CanonicalLoopInfo *> loopMapping;
+
/// A mapping between MLIR LLVM dialect terminators and LLVM IR terminators
/// they are converted to. This allows for connecting PHI nodes to the source
/// values after all operations are converted.
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7a0a7f86bc1e9..e77c4a0b94de9 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -42,6 +42,16 @@ template <typename T>
struct OpenMPOpConversion : public ConvertOpToLLVMPattern<T> {
using ConvertOpToLLVMPattern<T>::ConvertOpToLLVMPattern;
+ OpenMPOpConversion(LLVMTypeConverter &typeConverter,
+ PatternBenefit benefit = 1)
+ : ConvertOpToLLVMPattern<T>(typeConverter, benefit) {
+ // Operations using CanonicalLoopInfoType are lowered only by
+ // mlir::translateModuleToLLVMIR() using the OpenMPIRBuilder. Until then,
+ // the type and operations using it must be preserved.
+ typeConverter.addConversion(
+ [&](::mlir::omp::CanonicalLoopInfoType type) { return type; });
+ }
+
LogicalResult
matchAndRewrite(T op, typename T::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 7b07243c5f843..6b54d957a30f3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3090,6 +3090,67 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}
+/// Convert an omp.canonical_loop to LLVM-IR
+static LogicalResult
+convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder);
+ Value loopIV = op.getInductionVar();
+ Value loopTC = op.getTripCount();
+
+ llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC);
+
+ llvm::Expected<llvm::CanonicalLoopInfo *> llvmOrError =
+ ompBuilder->createCanonicalLoop(
+ loopLoc,
+ [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) {
+ // Register the mapping of MLIR induction variable to LLVM-IR
+ // induction variable
+ moduleTranslation.mapValue(loopIV, llvmIV);
+
+ builder.restoreIP(ip);
+ llvm::Expected<llvm::BasicBlock *> bodyGenStatus =
+ convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder,
+ moduleTranslation);
+
+ return bodyGenStatus.takeError();
+ },
+ llvmTC, "omp.loop");
+ if (!llvmOrError)
+ return op.emitError(llvm::toString(llvmOrError.takeError()));
+
+ llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError;
+ llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP();
+ builder.restoreIP(afterIP);
+
+ // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop
+ if (Value cli = op.getCli())
+ moduleTranslation.mapOmpLoop(cli, llvmCLI);
+
+ return success();
+}
+
+/// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult
+applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+ Value applyee = op.getApplyee();
+ assert(applyee && "Loop to apply unrolling on required");
+
+ llvm::CanonicalLoopInfo *consBuilderCLI =
+ moduleTranslation.lookupOMPLoop(applyee);
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+ ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI);
+
+ moduleTranslation.invalidateOmpLoop(applyee);
+ return success();
+}
+
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static llvm::AtomicOrdering
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -5961,6 +6022,23 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
// etc. and then discarded
return success();
})
+ .Case([&](omp::NewCliOp op) {
+ // Meta-operation: Doesn't do anything by itself, but used to
+ // identify a loop.
+ return success();
+ })
+ .Case([&](omp::CanonicalLoopOp op) {
+ return convertOmpCanonicalLoopOp(op, builder, moduleTranslation);
+ })
+ .Case([&](omp::UnrollHeuristicOp op) {
+ // FIXME: Handling omp.unroll_heuristic as an executable requires
+ // that the generator (e.g. omp.canonical_loop) has been seen first.
+ // For construct that require all codegen to occur inside a callback
+ // (e.g. OpenMPIRBilder::createParallel), all codegen of that
+ // contained region including their transformations must occur at
+ // the omp.canonical_loop.
+ return applyUnrollHeuristic(op, builder, moduleTranslation);
+ })
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
new file mode 100644
index 0000000000000..9abef003d6183
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
@@ -0,0 +1,175 @@
+// Test lowering of standalone omp.canonical_loop
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: define void @anon_loop(
+// CHECK-SAME: ptr %[[ptr:.+]],
+// CHECK-SAME: i32 %[[tc:.+]]) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+llvm.func @anon_loop(%ptr: !llvm.ptr, %tc : i32) -> () {
+ omp.canonical_loop %iv : i32 in range(%tc) {
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+
+
+// CHECK-LABEL: define void @trivial_loop(
+// CHECK-SAME: ptr %[[ptr:.+]],
+// CHECK-SAME: i32 %[[tc:.+]]) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+llvm.func @trivial_loop(%ptr: !llvm.ptr, %tc : i32) -> () {
+ %cli = omp.new_cli
+ omp.canonical_loop(%cli) %iv : i32 in range(%tc) {
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+
+// CHECK-LABEL: define void @nested_loop(
+// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT: br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1:
+// CHECK-NEXT: br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header2:
+// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ]
+// CHECK-NEXT: br label %omp_omp.loop.cond3
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond3:
+// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4:
+// CHECK-NEXT: br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12:
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT: br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11:
+// CHECK-NEXT: br label %omp_omp.loop.inc5
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc5:
+// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1
+// CHECK-NEXT: br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit6:
+// CHECK-NEXT: br label %omp_omp.loop.after7
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after7:
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+llvm.func @nested_loop(%ptr: !llvm.ptr, %outer_tc : i32, %inner_tc : i32) -> () {
+ %outer_cli = omp.new_cli
+ %inner_cli = omp.new_cli
+ omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) {
+ omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) {
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
new file mode 100644
index 0000000000000..0f0448e15f983
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
@@ -0,0 +1,56 @@
+// Test lowering of the omp.unroll_heuristic
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+// CHECK-LABEL: define void @unroll_heuristic_trivial_loop(
+// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[tc:.+]]) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD1:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+llvm.func @unroll_heuristic_trivial_loop(%ptr: !llvm.ptr, %tc: i32) -> () {
+ %literal_cli = omp.new_cli
+ omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) {
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.unroll_heuristic(%literal_cli)
+ llvm.return
+}
+
+
+// Start of metadata
+// CHECK-LABEL: !llvm.module.flags
+
+// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]}
+// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir
new file mode 100644
index 0000000000000..f82b4990e378e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir
@@ -0,0 +1,93 @@
+// Test lowering of the omp.unroll_heuristic
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+// CHECK-LABEL: define void @unroll_heuristic_nested_loop(
+// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT: br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1:
+// CHECK-NEXT: br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header2:
+// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ]
+// CHECK-NEXT: br label %omp_omp.loop.cond3
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond3:
+// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]]
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4:
+// CHECK-NEXT: br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12:
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT: br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11:
+// CHECK-NEXT: br label %omp_omp.loop.inc5
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc5:
+// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1
+// CHECK-NEXT: br label %omp_omp.loop.header2, !llvm.loop ![[$MD1:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit6:
+// CHECK-NEXT: br label %omp_omp.loop.after7
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after7:
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD3:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+llvm.func @unroll_heuristic_nested_loop(%ptr: !llvm.ptr, %outer_tc: i32, %inner_tc: i32) -> () {
+ %outer_cli = omp.new_cli
+ %inner_cli = omp.new_cli
+ omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) {
+ omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) {
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.unroll_heuristic(%outer_cli)
+ omp.unroll_heuristic(%inner_cli)
+ llvm.return
+}
+
+
+// Start of metadata
+// CHECK-LABEL: !llvm.module.flags
+
+// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]}
+// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"}
+// CHECK: ![[$MD3]] = distinct !{![[$MD3]], ![[$MD2]]}
+
More information about the Mlir-commits
mailing list