[llvm-branch-commits] [mlir] [mlir][omp] Add omp.tile operation (PR #160292)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Sep 24 04:14:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Michael Kruse (Meinersbur)
<details>
<summary>Changes</summary>
Add the `omp.tile` loop transformations for the OpenMP dialect. Used for lowering a standalone `!$omp tile` in Flang.
The pretty operation printing format is currently inconcistent between `omp.canonical_loop`, `omp.unroll_heuristic`, and `omp.tile`. Open for suggestions what the concistent format should be.
PR Stack:
* #<!-- -->160283
* #<!-- -->159773
* #<!-- -->160292 (this PR)
* #<!-- -->160298
---
Patch is 45.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160292.diff
13 Files Affected:
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td (+29)
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td (+67-2)
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+25-38)
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+154)
- (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+42)
- (added) mlir/test/Dialect/OpenMP/cli-tile.mlir (+138)
- (added) mlir/test/Dialect/OpenMP/invalid-tile.mlir (+119)
- (added) mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir (+101)
- (added) mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir (+190)
- (modified) mlir/test/mlir-tblgen/op-format-invalid.td (+1-1)
- (modified) mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp (+1)
- (modified) mlir/tools/mlir-tblgen/FormatGen.cpp (+1-1)
- (modified) mlir/tools/mlir-tblgen/OpFormatGen.cpp (+1)
``````````diff
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 1eda5e4bc1618..8e43c4284d078 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -995,6 +995,35 @@ class OpenMP_NumTeamsClauseSkip<
def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>;
+//===----------------------------------------------------------------------===//
+// V5.1: [10.1.2] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_SizesClauseSkip<
+ bit traits = false, bit arguments = false, bit assemblyFormat = false,
+ bit description = false, bit extraClassDeclaration = false
+ > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+ extraClassDeclaration> {
+ let arguments = (ins
+ Variadic<IntLikeType>:$sizes
+ );
+
+ let optAssemblyFormat = [{
+ `sizes` `(` $sizes `:` type($sizes) `)`
+ }];
+
+ let description = [{
+ The `sizes` clauses defines the size of a grid over a multi-dimensional
+ logical iteration space. This grid is used for loop transformations such as
+ `tile` and `strip`. The size per dimension can be a variable, but only
+ values that are not at least 2 make sense. It is not specified what happens
+ when smaller values are used, but should still result in a loop nest that
+ executes each logical iteration once.
+ }];
+}
+
+def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>;
+
//===----------------------------------------------------------------------===//
// V5.2: [10.1.2] `num_threads` clause
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index bbcfb87fa03c6..5ad4e4b5b61d1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> {
let summary = "Type for representing omp map clause bounds information";
}
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+ let summary = "Type for representing a reference to a canonical loop";
+ let description = [{
+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+ canonical loop in the same function. Values of this type are not
+ available at runtime and therefore cannot be used by the program itself,
+ i.e. an opaque type. It is similar to the transform dialect's
+ `!transform.interface` type, but instead of implementing an interface
+ for each transformation, the OpenMP dialect itself defines possible
+ operations on this type.
+
+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+ 1. created by omp.new_cli.
+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+ can only be associated once.
+ 3. passed to an omp loop transformation operation that modifies the loop
+ associated with the CLI. The CLI is the "applyee" and the operation is
+ the consumer. A CLI can only be consumed once.
+ 4. passed to an omp loop transformation operation to associate the cli with
+ a result of that transformation. The CLI is the "generatee" and the
+ operation is the generator.
+
+ A CLI cannot
+
+ 1. be returned from a function.
+ 2. be passed to operations that are not specifically designed to take a
+ CanonicalLoopInfoType, including AnyType.
+
+ A CLI directly corresponds to an object of
+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Base classes for OpenMP dialect operations.
//===----------------------------------------------------------------------===//
@@ -211,8 +249,35 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
// Doesn't actually create a C++ base class (only defines default values for
// tablegen classes that derive from this). Use LoopTransformationInterface
// instead for common operations.
-class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
- OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > {
+class OpenMPTransform_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMP_Op<mnemonic,
+ traits = !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits),
+ clauses = clauses> {
+}
+
+// Base clause for loop transformations using the standard syntax.
+//
+// omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... <attr-dicr>
+// omp.opname ($applyees) clause(...) clause(...) ... <attr-dict>
+//
+// $generatees is optional and is assumed to be empty if omitted
+class OpenMPTransformBase_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMPTransform_Op<mnemonic,
+ traits = !listconcat(traits, [AttrSizedOperandSegments]),
+ clauses = clauses> {
+
+ let arguments = !con(
+ (ins Variadic<CanonicalLoopInfoType>:$generatees,
+ Variadic<CanonicalLoopInfoType>:$applyees
+ ), clausesArgs);
+
+ let assemblyFormat = [{ custom<LoopTransformClis>($generatees, $applyees) }]
+ # clausesAssemblyFormat
+ # [{ attr-dict }];
}
#endif // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5c77e215467e4..b73091ea0ca53 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -357,44 +357,6 @@ def SingleOp : OpenMP_Op<"single", traits = [
let hasVerifier = 1;
}
-//===---------------------------------------------------------------------===//
-// OpenMP Canonical Loop Info Type
-//===---------------------------------------------------------------------===//
-
-def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
- let summary = "Type for representing a reference to a canonical loop";
- let description = [{
- A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
- canonical loop in the same function. Values of this type are not
- available at runtime and therefore cannot be used by the program itself,
- i.e. an opaque type. It is similar to the transform dialect's
- `!transform.interface` type, but instead of implementing an interface
- for each transformation, the OpenMP dialect itself defines possible
- operations on this type.
-
- A value of type CanonicalLoopInfoType (in the following: CLI) value can be
-
- 1. created by omp.new_cli.
- 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
- can only be associated once.
- 3. passed to an omp loop transformation operation that modifies the loop
- associated with the CLI. The CLI is the "applyee" and the operation is
- the consumer. A CLI can only be consumed once.
- 4. passed to an omp loop transformation operation to associate the cli with
- a result of that transformation. The CLI is the "generatee" and the
- operation is the generator.
-
- A CLI cannot
-
- 1. be returned from a function.
- 2. be passed to operations that are not specifically designed to take a
- CanonicalLoopInfoType, including AnyType.
-
- A CLI directly corresponds to an object of
- OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
- }];
-}
-
//===---------------------------------------------------------------------===//
// OpenMP Canonical Loop Info Creation
//===---------------------------------------------------------------------===//
@@ -563,6 +525,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
let hasCustomAssemblyFormat = 1;
}
+//===----------------------------------------------------------------------===//
+// OpenMP tile operation
+//===----------------------------------------------------------------------===//
+
+def TileOp : OpenMPTransformBase_Op<"tile",
+ clauses = [OpenMP_SizesClause]> {
+ let summary = "OpenMP tile operation";
+ let description = [{
+ Represents the OpenMP tile directive introduced in OpenMP 5.1.
+
+ The construct partitions the logical iteration space of the affected loops
+ into equally-sized tiles, then creates two sets of nested loops. The outer
+ loops, called the grid loops, iterate over all tiles. The inner loops,
+ called the intratile loops, iterate over the logical iterations of a tile.
+ The sizes clause determines the size of a tile.
+
+ Currently, the affected loops must be rectangular (the tripcount of the
+ inner loop must not depend on any iv of an surrounding affected loop) and
+ perfectly nested (except for the innermost affected loop, no operations
+ other than the nested loop and the terminator in the loop body).
+ }] # clausesDescription;
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// 2.8.3 Workshare Construct
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 1674891410194..f681b0346f489 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/bit.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/InterleavedRange.h"
#include <cstddef>
#include <iterator>
#include <optional>
@@ -3299,6 +3300,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
Value result = getResult();
auto [newCli, gen, cons] = decodeCli(result);
+ // Structured binding `gen` cannot be captured in lambdas before C++20
+ OpOperand *generator = gen;
+
// Derive the CLI variable name from its generator:
// * "canonloop" for omp.canonical_loop
// * custom name for loop transformation generatees
@@ -3317,6 +3321,24 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
})
+ .Case([&](TileOp op) -> std::string {
+ auto [generateesFirst, generateesCount] =
+ op.getGenerateesODSOperandIndexAndLength();
+ unsigned firstGrid = generateesFirst;
+ unsigned firstIntratile = generateesFirst + generateesCount / 2;
+ unsigned end = generateesFirst + generateesCount;
+ unsigned opnum = generator->getOperandNumber();
+ // In the OpenMP apply and looprange clauses, indices are 1-based
+ if (firstGrid <= opnum && opnum < firstIntratile) {
+ unsigned gridnum = opnum - firstGrid + 1;
+ return ("grid" + Twine(gridnum)).str();
+ }
+ if (firstIntratile <= opnum && opnum < end) {
+ unsigned intratilenum = opnum - firstIntratile + 1;
+ return ("intratile" + Twine(intratilenum)).str();
+ }
+ llvm_unreachable("Unexpected generatee argument");
+ })
.Default([&](Operation *op) {
assert(false && "TODO: Custom name for this operation");
return "transformed";
@@ -3545,6 +3567,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
return {0, 0};
}
+//===----------------------------------------------------------------------===//
+// TileOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, TileOp op,
+ OperandRange generatees,
+ OperandRange applyees) {
+ if (!generatees.empty())
+ p << '(' << llvm::interleaved(generatees) << ')';
+
+ if (!applyees.empty())
+ p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+static ParseResult parseLoopTransformClis(
+ OpAsmParser &parser,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) {
+ if (parser.parseOptionalLess()) {
+ // Syntax 1: generatees present
+
+ if (parser.parseOperandList(generateesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ if (parser.parseLess())
+ return failure();
+ } else {
+ // Syntax 2: generatees omitted
+ }
+
+ // Parse `<-` (`<` has already been parsed)
+ if (parser.parseMinus())
+ return failure();
+
+ if (parser.parseOperandList(applyeesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ return success();
+}
+
+LogicalResult TileOp::verify() {
+ if (getApplyees().empty())
+ return emitOpError() << "must apply to at least one loop";
+
+ if (getSizes().size() != getApplyees().size())
+ return emitOpError() << "there must be one tile size for each applyee";
+
+ if (!getGeneratees().empty() &&
+ 2 * getSizes().size() != getGeneratees().size())
+ return emitOpError()
+ << "expecting two times the number of generatees than applyees";
+
+ DenseSet<Value> parentIVs;
+
+ Value parent = getApplyees().front();
+ for (auto &&applyee : llvm::drop_begin(getApplyees())) {
+ auto [parentCreate, parentGen, parentCons] = decodeCli(parent);
+ auto [create, gen, cons] = decodeCli(applyee);
+
+ if (!parentGen)
+ return emitOpError() << "applyee CLI has no generator";
+
+ auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner());
+ if (!parentGen)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ parentIVs.insert(parentLoop.getInductionVar());
+
+ if (!gen)
+ return emitOpError() << "applyee CLI has no generator";
+ auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+ if (!loop)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ // Canonical loop must be perfectly nested, i.e. the body of the parent must
+ // only contain the omp.canonical_loop of the nested loops, and
+ // omp.terminator
+ bool isPerfectlyNested = [&]() {
+ auto &parentBody = parentLoop.getRegion();
+ if (!parentBody.hasOneBlock())
+ return false;
+ auto &parentBlock = parentBody.getBlocks().front();
+
+ auto nestedLoopIt = parentBlock.begin();
+ if (nestedLoopIt == parentBlock.end() ||
+ (&*nestedLoopIt != loop.getOperation()))
+ return false;
+
+ auto termIt = std::next(nestedLoopIt);
+ if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt))
+ return false;
+
+ if (std::next(termIt) != parentBlock.end())
+ return false;
+
+ return true;
+ }();
+ if (!isPerfectlyNested)
+ return emitOpError() << "tiled loop nest must be perfectly nested";
+
+ if (parentIVs.contains(loop.getTripCount()))
+ return emitOpError() << "tiled loop nest must be rectangular";
+
+ parent = applyee;
+ }
+
+ // TODO: The tile sizes must be computed before the loop, but checking this
+ // requires dominance analysis. For instance:
+ //
+ // %canonloop = omp.new_cli
+ // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // // write to %x
+ // omp.terminator
+ // }
+ // %ts = llvm.load %x
+ // omp.tile <- (%canonloop) sizes(%ts : i32)
+
+ return success();
+}
+
+std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
//===----------------------------------------------------------------------===//
// Critical construct (2.17.1)
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4921a1990b6e8..171ac61dd66fe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3154,6 +3154,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
return success();
}
+/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+ SmallVector<llvm::CanonicalLoopInfo *> translatedLoops;
+ SmallVector<llvm::Value *> translatedSizes;
+
+ for (Value size : op.getSizes()) {
+ llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
+ assert(translatedSize &&
+ "sizes clause arguments must already be translated");
+ translatedSizes.push_back(translatedSize);
+ }
+
+ for (Value applyee : op.getApplyees()) {
+ llvm::CanonicalLoopInfo *consBuilderCLI =
+ moduleTranslation.lookupOMPLoop(applyee);
+ assert(applyee && "Canonical loop must already been translated");
+ translatedLoops.push_back(consBuilderCLI);
+ }
+
+ auto generatedLoops =
+ ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
+ if (!op.getGeneratees().empty()) {
+ for (auto [mlirLoop, genLoop] :
+ zip_equal(op.getGeneratees(), generatedLoops))
+ moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
+ }
+
+ // CLIs can only be consumed once
+ for (Value applyee : op.getApplyees())
+ moduleTranslation.invalidateOmpLoop(applyee);
+
+ return success();
+}
+
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static llvm::AtomicOrdering
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -6196,6 +6235,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
// the omp.canonical_loop.
return applyUnrollHeuristic(op, builder, moduleTranslation);
})
+ .Case([&](omp::TileOp op) {
+ return applyTile(op, builder, moduleTranslation);
+ })
.Case([&](omp::TargetAllocMemOp) {
return convertTargetAllocMemOp(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir
new file mode 100644
index 0000000000000..73d54784c52b7
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir
@@ -0,0 +1,138 @@
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
+
+
+// Raw syntax check (MLIR output is always pretty-printed)
+// CHECK-LABEL: @omp_tile_raw(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ "omp.can...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160292
More information about the llvm-branch-commits
mailing list