[Mlir-commits] [mlir] [MLIR][OpenMP] Add canonical loop operations (PR #147061)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Jul 7 06:15:17 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-openmp

Author: Michael Kruse (Meinersbur)

<details>
<summary>Changes</summary>

Add the supporting OpenMP Dialect operations, types, and interfaces for modelling 

MLIR Operations:
 * omp.newcli
 * omp.canonical_loop

MLIR Types:
 * !omp.cli

MLIR Interfaces:
 * LoopTransformationInterface

As a first loop transformations to be able to use these new operation in follow-up PRs (#<!-- -->144785)
 * omp.unroll_heuristic 


If bikeshedding on the operation names, pretty formatting of operations inkl. future transformations such as `omp.tile`, and names of omp.cli values computed by `getAsmResultNames` (e.g. `%canonloop_s0`), I think this PR would be the right place.

---

Patch is 35.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147061.diff


8 Files Affected:

- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h (+5) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td (+11) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+207) 
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td (+86) 
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+353) 
- (added) mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir (+50) 
- (added) mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir (+157) 
- (added) mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir (+59) 


``````````diff
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
index ab11a6094e3e7..7cf738352ba47 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
@@ -37,4 +37,9 @@
 #define GET_OP_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOps.h.inc"
 
+namespace mlir::omp {
+/// Find the omp.new_cli, generator, and consumer of a canonical loop info.
+std::tuple<NewCliOp, OpOperand *, OpOperand *> decodeCli(mlir::Value cli);
+} // namespace mlir::omp
+
 #endif // MLIR_DIALECT_OPENMP_OPENMPDIALECT_H_
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index f3dd44d2c0717..bbcfb87fa03c6 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -204,4 +204,15 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
   let regions = !if(singleRegion, (region AnyRegion:$region), (region));
 }
 
+
+// Base class for OpenMP loop transformations (that either consume or generate
+// loops)
+//
+// Doesn't actually create a C++ base class (only defines default values for
+// tablegen classes that derive from this). Use LoopTransformationInterface
+// instead for common operations.
+class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
+      OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits)  > {
+}
+
 #endif  // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index ac80926053a2d..703384a0680d0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/OpBase.td"
 include "mlir/IR/SymbolInterfaces.td"
 
@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
   let hasVerifier = 1;
 }
 
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+  let summary = "Type for representing a reference to a canonical loop";
+  let description = [{
+    A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+    canonical loop in the same function. Values of this type are not
+    available at runtime and therefore cannot be used by the program itself,
+    i.e. an opaque type. It is similar to the transform dialect's
+    `!transform.interface` type, but instead of implementing an interface
+    for each transformation, the OpenMP dialect itself defines possible
+    operations on this type.
+
+    A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+    1. created by omp.new_cli.
+    2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+       can only be associated once.
+    3. passed to an omp loop transformation operation that modifies the loop
+       associated with the CLI. The CLI is the "applyee" and the operation is
+       the consumer. A CLI can only be consumed once.
+    4. passed to an omp loop transformation operation to associate the cli with
+       a result of that transformation. The CLI is the "generatee" and the
+       operation is the generator.
+
+    A CLI cannot
+
+    1. be returned from a function.
+    2. be passed to operations that are not specifically designed to take a
+       CanonicalLoopInfoType, including AnyType.
+
+    A CLI directly corresponds to an object of
+    OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+  }];
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Creation
+//===---------------------------------------------------------------------===//
+
+def NewCliOp : OpenMP_Op<"new_cli",
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
+  let summary = "Create a new Canonical Loop Info value.";
+  let description = [{
+    Create a new CLI that can be passed as an argument to a CanonicalLoopOp
+    and to loop transformation operations to handle dependencies between
+    loop transformation operations.
+  }];
+
+  let arguments = (ins );
+  let results = (outs CanonicalLoopInfoType:$result);
+  let assemblyFormat = [{
+      attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins )>,
+  ];
+
+  let hasVerifier = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Operation
+//===---------------------------------------------------------------------===//
+def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop", 
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", "getAsmBlockArgumentNames"]>]> {
+  let summary = "OpenMP Canonical Loop Operation";
+  let description = [{
+    All loops that conform to OpenMP's definition of a canonical loop can be
+    simplified to a CanonicalLoopOp. In particular, there are no loop-carried
+    variables and the number of iterations it will execute is known before the
+    operation. This allows e.g. to determine the number of threads and chunks
+    the iterations space is split into before executing any iteration. More
+    restrictions may apply in cases such as (collapsed) loop nests, doacross
+    loops, etc.
+
+    In contrast to other loop operations such as `scf.for`, the number of
+    iterations is determined by only a single variable, the trip-count. The
+    induction variable value is the logical iteration number of that iteration,
+    which OpenMP defines to be between 0 and the trip-count (exclusive).
+    Loop representation having lower-bound, upper-bound, and step-size operands,
+    require passes to do more work than necessary, including handling special
+    cases such as upper-bound smaller than lower-bound, upper-bound equal to
+    the integer type's maximal value, negative step size, etc. This complexity
+    is better only handled once by the front-end and can apply its semantics
+    for such cases while still being able to represent any kind of loop, which
+    kind of the point of a mid-end intermediate representation. User-defined
+    types such as random-access iterators in C++ could not directly be
+    represented anyway.
+
+    The induction variable is always of the same type as the tripcount argument.
+    Since it can never be negative, tripcount is always interpreted as an
+    unsigned integer. It is the caller's responsibility to ensure the tripcount
+    is not negative when its interpretation is signed, i.e.
+    `%tripcount = max(0,%tripcount)`.
+
+    An optional argument to a omp.canonical_loop that can be passed in
+    is a CanonicalLoopInfo value that can be used to refer to the canonical
+    loop to apply transformations -- such as tiling, unrolling, or
+    work-sharing -- to the loop, similar to the transform dialect but
+    with OpenMP-specific semantics. Because it is optional, it has to be the
+    last of the operands, but appears first in the pretty format printing.
+
+    The pretty assembly format is inspired by python syntax, where `range(n)`
+    returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
+    is one of:
+
+     omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
+     omp.canonical_loop       %iv : !type in range(%tripcount)
+
+    A CanonicalLoopOp is lowered to LLVM-IR using
+    `OpenMPIRBuilder::createCanonicalLoop`.
+
+    #### Examples
+
+    Translation from lower-bound, upper-bound, step-size to trip-count.
+    ```c
+    for (int i = 3; i < 42; i+=2) {
+      B[i] = A[i];
+    }
+    ```
+
+    ```mlir
+    %lb = arith.constant 3 : i32
+    %ub = arith.constant 42 : i32
+    %step = arith.constant 2 : i32
+    %range = arith.sub %ub, %lb : i32
+    %tripcount = arith.div %range, %step : i32
+    omp.canonical_loop %iv : i32 in range(%tripcount) {
+      %offset = arith.mul %iv, %step : i32
+      %i = arith.add %offset, %lb : i32
+      %a = load %arrA[%i] : memref<?xf32>
+      store %a, %arrB[%i] : memref<?xf32>
+    }
+    ```
+
+    Nested canonical loop with transformation of the inner loop.
+    ```mlir
+    %outer = omp.new_cli : !omp.cli
+    %inner = omp.new_cli : !omp.cli
+    omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
+      omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
+        %a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
+        store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
+      }
+    }
+    omp.unroll_full(%inner)
+    ```
+  }];
+
+
+  let arguments = (ins IntLikeType:$tripCount,
+                       Optional<CanonicalLoopInfoType>:$cli);
+  let regions = (region AnyRegion:$region);
+
+  let extraClassDeclaration = [{
+    ::mlir::Value getInductionVar();
+  }];
+
+  let builders = [
+    OpBuilder<(ins "::mlir::Value":$tripCount)>,
+    OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMP unroll_heuristic operation
+//===----------------------------------------------------------------------===//
+
+def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
+  let summary = "OpenMP heuristic unroll operation";
+  let description = [{
+    Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
+
+    The operation has one applyee and no generatees. The applyee is unrolled
+    according to implementation-defined heuristics. Implementations may choose
+    to not unroll the loop, partially unroll by a chosen factor, or fully
+    unroll it. Even if the implementation chooses to partially unroll the
+    applyee, the resulting unrolled loop is not accessible as a generatee. Use
+    omp.unroll_partial if a generatee is required.
+
+    The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
+    which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
+    unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
+    actually performed in optimized builds.
+
+    Assembly formats:
+      omp.unroll_heuristic(%cli)
+      omp.unroll_heuristic(%cli) -> ()
+  }];
+
+  let arguments = (ins CanonicalLoopInfoType:$applyee);
+
+  let builders = [
+    OpBuilder<(ins "::mlir::Value":$cli)>,
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // 2.8.3 Workshare Construct
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 92bf34ef3145f..8d794918d57d5 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -551,4 +551,90 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
   ];
 }
 
+def LoopTransformationInterface : OpInterface<"LoopTransformationInterface"> {
+  let description = [{
+    Methods that are common for OpenMP loop transformation operations.
+  }];
+
+  let cppNamespace = "::mlir::omp";
+
+  let methods = [
+
+    InterfaceMethod<
+      /*description=*/[{
+        Get the indices for the arguments that represent CanonicalLoopInfo
+        applyees, i.e. loops that are transformed/consumed by this operation.
+      }],
+      /*returnType=*/ "std::pair<unsigned, unsigned>",
+      /*methodName=*/ "getApplyeesODSOperandIndexAndLength",
+      /*args=*/(ins)
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Get the indices for the arguments that represent CanonicalLoopInfo
+        generatees, i.e. loops that created by this operation.
+      }],
+      /*returnType=*/ "std::pair<unsigned, unsigned>",
+      /*methodName=*/ "getGenerateesODSOperandIndexAndLength",
+      /*args=*/(ins)
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return the number of applyees of this loop transformation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "getNumApplyees",
+      /*args=*/       (ins),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        return  $_op.getApplyeesODSOperandIndexAndLength().second;
+      }]
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return the number of generatees of this loop transformation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "getNumGeneratees",
+      /*args=*/       (ins),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        return  $_op.getGenerateesODSOperandIndexAndLength().second;
+      }]
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return whether the provided operand is a generatee of this operation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "isApplyee",
+      /*args=*/       (ins "unsigned":$opnum),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        auto applyeeArgs = $_op.getApplyeesODSOperandIndexAndLength();
+        return (applyeeArgs.first <= opnum && opnum < applyeeArgs.first + applyeeArgs.second) ;
+      }]
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return whether the provided operand is a generatee of this operation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "isGeneratee",
+      /*args=*/       (ins "unsigned":$opnum),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        auto generateeArgs = $_op.getGenerateesODSOperandIndexAndLength();
+        return (generateeArgs.first <= opnum && opnum < generateeArgs.first + generateeArgs.second) ;
+      }]
+    >,
+
+  ];
+}
+
 #endif // OPENMP_OPS_INTERFACES
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index ffc84781f77ff..f1263af129cce 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -26,6 +26,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallString.h"
@@ -3031,6 +3032,358 @@ void LoopNestOp::gatherWrappers(
   }
 }
 
+//===----------------------------------------------------------------------===//
+// OpenMP canonical loop handling
+//===----------------------------------------------------------------------===//
+
+std::tuple<NewCliOp, OpOperand *, OpOperand *>
+mlir::omp ::decodeCli(Value cli) {
+
+  // Defining a CLI for a generated loop is optional; if there is none then
+  // there is no followup-tranformation
+  if (!cli)
+    return {{}, nullptr, nullptr};
+
+  MLIRContext *ctx = cli.getContext();
+  assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+         "Unexpected type of cli");
+
+  NewCliOp create = cast<NewCliOp>(cli.getDefiningOp());
+  OpOperand *gen = nullptr;
+  OpOperand *cons = nullptr;
+  for (OpOperand &use : cli.getUses()) {
+    auto op = cast<LoopTransformationInterface>(use.getOwner());
+
+    unsigned opnum = use.getOperandNumber();
+    if (op.isGeneratee(opnum)) {
+      assert(!gen && "Each CLI may have at most one def");
+      gen = &use;
+    } else if (op.isApplyee(opnum)) {
+      assert(!cons && "Each CLI may have at most one consumer");
+      cons = &use;
+    } else {
+      llvm_unreachable("Unexpected operand for a CLI");
+    }
+  }
+
+  return {create, gen, cons};
+}
+
+void NewCliOp::build(::mlir::OpBuilder &odsBuilder,
+                     ::mlir::OperationState &odsState) {
+  odsState.addTypes(CanonicalLoopInfoType::get(odsBuilder.getContext()));
+}
+
+void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
+  Value result = getResult();
+  auto [newCli, gen, cons] = decodeCli(result);
+
+  // Derive the CLI variable name from its generator:
+  //  * "canonloop" for omp.canonical_loop
+  //  * custom name for loop transformation generatees
+  //  * "cli" as fallback if no generator
+  //  * "_r<idx>" suffix for nested loops, where <idx> is the sequential order
+  //  at that level
+  //  * "_s<idx>" suffix for operations with multiple regions, where <idx> is
+  //  the index of that region
+  std::string cliName{"cli"};
+  if (gen) {
+    cliName =
+        TypeSwitch<Operation *, std::string>(gen->getOwner())
+            .Case([&](CanonicalLoopOp op) {
+              // Find the canonical loop nesting: For each ancestor add a
+              // "+_r<idx>" suffix (in reverse order)
+              SmallVector<std::string> components;
+              Operation *o = op.getOperation();
+              while (o) {
+                if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+                  break;
+
+                Region *r = o->getParentRegion();
+                if (!r)
+                  break;
+
+                auto getSequentialIndex = [](Region *r, Operation *o) {
+                  llvm::ReversePostOrderTraversal<Block *> traversal(
+                      &r->getBlocks().front());
+                  size_t idx = 0;
+                  for (Block *b : traversal) {
+                    for (Operation &op : *b) {
+                      if (&op == o)
+                        return idx;
+                      // Only consider operations that are containers as
+                      // possible children
+                      if (!op.getRegions().empty())
+                        idx += 1;
+                    }
+                  }
+                  llvm_unreachable("Operation not part of the region");
+                };
+                size_t sequentialIdx = getSequentialIndex(r, o);
+                components.push_back(("s" + Twine(sequentialIdx)).str());
+
+                Operation *parent = r->getParentOp();
+                if (!parent)
+                  break;
+
+                // If the operation has more than one region, also count in
+                // which of the regions
+                if (parent->getRegions().size() > 1) {
+                  auto getRegionIndex = [](Operation *o, Region *r) {
+                    for (auto [idx, region] :
+                         llvm::enumerate(o->getRegions())) {
+                      if (&region == r)
+                        return idx;
+                    }
+                    llvm_unreachable("Region not child its parent operation");
+                  };
+                  size_t regionIdx = getRegionIndex(parent, r);
+                  components.push_back(("r" + Twine(regionIdx)).str());
+                }
+
+                // next parent
+                o = parent;
+              }
+
+              SmallString<64> Name("canonloop");
+              for (std::string s : reverse(components)) {
+                Name += '_';
+                Name += s;
+              }
+
+              return Name;
+            })
+            .Case([&](UnrollHeuristicOp op) -> std::string {
+              llvm_unreachable("heuristic unrolling does not generate a loop");
+            })
+            .Default([&](Operation *op) {
+              assert(!"TODO: Custom name for this operation");
+              return "transformed";
+            });
+  }
+
+  setNameFn(result, cliName);
+}
+
+LogicalResult NewCliOp::verify() {
+  Value cli = getResult();
+
+  MLIRContext *ctx = cli.getContext();
+  assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+         "Unexpected type of cli");
+
+  // Check that the CLI is used in at most generator and one consumer
+  OpOperand *gen = nullptr;
+  OpOperand *cons = nullptr;
+  for (mlir::OpOperand &use : cli.getUses()) {
+    auto op = cast<mlir::omp::LoopTransformationInterface>(use.getOwner());
+
+    unsigned opnum = use.getOperandNumber();
+    if (op.isGeneratee(opnum)) {
+      if (gen) {
+        InFlightDiagnostic error =
+            emitOpError("CLI must have at most one generator");
+        error.attachNote(gen->getOwner()->getLoc())
+            .appen...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/147061


More information about the Mlir-commits mailing list