[Mlir-commits] [mlir] [mlir][OpenMP] Support pure taskloop-local loop bounds (PR #190992)
Tom Eccles
llvmlistbot at llvm.org
Mon Apr 13 06:21:37 PDT 2026
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/190992
>From 5ca274d43db57f987e392ccc32e4a9004874e361 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 8 Apr 2026 16:16:53 +0100
Subject: [PATCH 1/2] [mlir][OpenMP] Support pure taskloop-local loop bounds
Follow up to https://github.com/llvm/llvm-project/pull/190827
Assisted-by: codex
---
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +-
.../mlir/Target/LLVMIR/ModuleTranslation.h | 15 +-
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 33 ++-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 254 ++++++++++++------
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 7 +-
mlir/test/Dialect/OpenMP/invalid.mlir | 14 +-
mlir/test/Dialect/OpenMP/ops.mlir | 19 ++
.../LLVMIR/openmp-taskloop-local-bounds.mlir | 65 +++++
8 files changed, 310 insertions(+), 101 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 40ec8904a136f..669dd3cd1544a 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -1017,7 +1017,9 @@ def TaskloopContextOp : OpenMP_Op<"taskloop.context", traits = [
```
The wrapped `omp.loop_nest` lower bounds, upper bounds and steps must be
- defined outside of the `omp.taskloop.context` region.
+ either defined outside of the `omp.taskloop.context` region or produced by
+ pure, regionless operations inside the region that do not depend on block
+ arguments.
For definitions of "undeferred task", "included task", "final task" and
"mergeable task", please check OpenMP Specification.
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index c67bb57985bd0..443f7c9fc2cb2 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -202,6 +202,9 @@ class ModuleTranslation {
/// in these blocks.
void forgetMapping(Region ®ion);
+ /// Removes the mapping for the given value.
+ void forgetMapping(Value value) { valueMapping.erase(value); }
+
/// Returns the LLVM metadata corresponding to a mlir LLVM dialect alias scope
/// attribute. Creates the metadata node if it has not been converted before.
llvm::MDNode *getOrCreateAliasScope(AliasScopeAttr aliasScopeAttr);
@@ -308,6 +311,13 @@ class ModuleTranslation {
/*recordInsertions=*/false);
}
+ /// Converts the given MLIR operation into LLVM IR using this translator. It
+ /// is up to the caller to ensure that all operands have been mapped before
+ /// calling this function.
+ LogicalResult convertOperation(Operation &op, llvm::IRBuilderBase &builder) {
+ return convertOperationImpl(op, builder, /*recordInsertions=*/false);
+ }
+
/// Converts argument and result attributes from `attrsOp` to LLVM IR
/// attributes on the `call` instruction. Returns failure if conversion fails.
/// The `immArgPositions` parameter is only relevant for intrinsics. It
@@ -381,8 +391,9 @@ class ModuleTranslation {
~ModuleTranslation();
/// Converts individual components.
- LogicalResult convertOperation(Operation &op, llvm::IRBuilderBase &builder,
- bool recordInsertions = false);
+ LogicalResult convertOperationImpl(Operation &op,
+ llvm::IRBuilderBase &builder,
+ bool recordInsertions = false);
LogicalResult convertFunctionSignatures();
LogicalResult convertFunctions();
LogicalResult convertIFuncs();
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index df74a316c7b66..20328375e8e9f 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -23,6 +23,7 @@
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Interfaces/FoldInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -3437,19 +3438,31 @@ LogicalResult TaskloopContextOp::verifyRegions() {
if (!loopNestOp)
return failure();
- auto isDefinedInTaskloopContext = [&](Value value) {
- // A region is considered an ancestor of itself
- return region.isAncestor(value.getParentRegion());
+ std::function<bool(Value)> isValidBoundValue = [&](Value value) -> bool {
+ Region *valueRegion = value.getParentRegion();
+ // A loop bound value defined outside of the taskloop context region is
+ // valid. A region is considered an ancestor of itself.
+ if (!region.isAncestor(valueRegion))
+ return true;
+
+ Operation *defOp = value.getDefiningOp();
+ if (!defOp || defOp->getNumRegions() != 0 || !isPure(defOp))
+ return false;
+
+ return llvm::all_of(defOp->getOperands(), isValidBoundValue);
};
- auto hasTaskloopLocalBound = [&](OperandRange range) {
- return llvm::any_of(range, isDefinedInTaskloopContext);
+ auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) {
+ return llvm::any_of(range,
+ [&](Value value) { return !isValidBoundValue(value); });
};
- if (hasTaskloopLocalBound(loopNestOp.getLoopLowerBounds()) ||
- hasTaskloopLocalBound(loopNestOp.getLoopUpperBounds()) ||
- hasTaskloopLocalBound(loopNestOp.getLoopSteps())) {
- return emitOpError() << "expects loop bounds and steps to be defined "
- "outside of the taskloop.context region";
+ if (hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopLowerBounds()) ||
+ hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopUpperBounds()) ||
+ hasUnsupportedTaskloopLocalBound(loopNestOp.getLoopSteps())) {
+ return emitOpError()
+ << "expects loop bounds and steps to be defined outside of the "
+ "taskloop.context region or by pure, regionless operations "
+ "that do not depend on block arguments";
}
return success();
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2e15f4de4545d..f7cda64e0f780 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -17,6 +17,7 @@
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
@@ -2874,6 +2875,171 @@ convertOmpTaskloopWrapperOp(omp::TaskloopWrapperOp loopWrapperOp,
return success();
}
+/// Look up the given value in the mapping, and if it's not there, translate its
+/// defining operation at the current builder insertion point. Only pure,
+/// regionless operations are supported because the same operation will later be
+/// translated again when the taskloop body itself is lowered.
+static llvm::Expected<llvm::Value *>
+lookupOrTranslatePureValue(Value value,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase &builder) {
+ if (llvm::Value *mapped = moduleTranslation.lookupValue(value))
+ return mapped;
+
+ Operation *defOp = value.getDefiningOp();
+ if (!defOp)
+ return llvm::make_error<llvm::StringError>(
+ "value is a block argument and is not mapped",
+ llvm::inconvertibleErrorCode());
+ if (defOp->getNumRegions() != 0 || !isPure(defOp))
+ return llvm::make_error<llvm::StringError>(
+ "unsupported op defining taskloop loop bound",
+ llvm::inconvertibleErrorCode());
+
+ SmallVector<Value> mappingsToRemove;
+ mappingsToRemove.reserve(defOp->getNumOperands() + defOp->getNumResults());
+ for (Value operand : defOp->getOperands()) {
+ if (moduleTranslation.lookupValue(operand))
+ continue;
+
+ llvm::Expected<llvm::Value *> operandOrError =
+ lookupOrTranslatePureValue(operand, moduleTranslation, builder);
+ if (!operandOrError)
+ return operandOrError.takeError();
+ moduleTranslation.mapValue(operand, *operandOrError);
+ mappingsToRemove.push_back(operand);
+ }
+
+ if (failed(moduleTranslation.convertOperation(*defOp, builder)))
+ return llvm::make_error<llvm::StringError>(
+ "failed to convert op defining taskloop loop bound",
+ llvm::inconvertibleErrorCode());
+
+ llvm::Value *result = moduleTranslation.lookupValue(value);
+ assert(result && "expected conversion of loop bound op to produce a value");
+
+ for (Value resultValue : defOp->getResults()) {
+ if (moduleTranslation.lookupValue(resultValue))
+ mappingsToRemove.push_back(resultValue);
+ }
+ for (Value mappedValue : mappingsToRemove)
+ moduleTranslation.forgetMapping(mappedValue);
+
+ return result;
+}
+
+static llvm::Error
+computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::Value *&lbVal, llvm::Value *&ubVal,
+ llvm::Value *&stepVal) {
+ Operation::operand_range lowerBounds = loopOp.getLoopLowerBounds();
+ Operation::operand_range upperBounds = loopOp.getLoopUpperBounds();
+ Operation::operand_range steps = loopOp.getLoopSteps();
+
+ llvm::Expected<llvm::Value *> firstLbOrErr =
+ lookupOrTranslatePureValue(lowerBounds[0], moduleTranslation, builder);
+ if (!firstLbOrErr)
+ return firstLbOrErr.takeError();
+
+ llvm::Type *boundType = (*firstLbOrErr)->getType();
+ ubVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+ if (loopOp.getCollapseNumLoops() > 1) {
+ // In cases where Collapse is used with Taskloop, the upper bound of the
+ // iteration space needs to be recalculated to cater for the collapsed loop.
+ // The Collapsed Loop UpperBound is the product of all collapsed
+ // loop's tripcount.
+ // The LowerBound for collapsed loops is always 1. When the loops are
+ // collapsed, it will reset the bounds and introduce processing to ensure
+ // the index's are presented as expected. As this happens after creating
+ // Taskloop, these bounds need predicting. Example:
+ // !$omp taskloop collapse(2)
+ // do i = 1, 10
+ // do j = 1, 5
+ // ..
+ // end do
+ // end do
+ // This loop above has a total of 50 iterations, so the lb will be 1, and
+ // the ub will be 50. collapseLoops in OMPIRBuilder then handles ensuring
+ // that i and j are properly presented when used in the loop.
+ for (uint64_t i = 0; i < loopOp.getCollapseNumLoops(); i++) {
+ llvm::Expected<llvm::Value *> lbOrErr =
+ i == 0 ? std::move(firstLbOrErr)
+ : lookupOrTranslatePureValue(lowerBounds[i], moduleTranslation,
+ builder);
+ if (!lbOrErr)
+ return lbOrErr.takeError();
+ llvm::Expected<llvm::Value *> ubOrErr = lookupOrTranslatePureValue(
+ upperBounds[i], moduleTranslation, builder);
+ if (!ubOrErr)
+ return ubOrErr.takeError();
+ llvm::Expected<llvm::Value *> stepOrErr =
+ lookupOrTranslatePureValue(steps[i], moduleTranslation, builder);
+ if (!stepOrErr)
+ return stepOrErr.takeError();
+
+ llvm::Value *loopLb = *lbOrErr;
+ llvm::Value *loopUb = *ubOrErr;
+ llvm::Value *loopStep = *stepOrErr;
+ // In some cases, such as where the ub is less than the lb so the loop
+ // steps down, the calculation for the loopTripCount is swapped. To ensure
+ // the correct value is found, calculate both UB - LB and LB - UB then
+ // select which value to use depending on how the loop has been
+ // configured.
+ llvm::Value *loopLbMinusOne = builder.CreateSub(
+ loopLb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
+ llvm::Value *loopUbMinusOne = builder.CreateSub(
+ loopUb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
+ llvm::Value *boundsCmp = builder.CreateICmpSLT(loopLb, loopUb);
+ llvm::Value *ubMinusLb = builder.CreateSub(loopUb, loopLbMinusOne);
+ llvm::Value *lbMinusUb = builder.CreateSub(loopLb, loopUbMinusOne);
+ llvm::Value *loopTripCount =
+ builder.CreateSelect(boundsCmp, ubMinusLb, lbMinusUb);
+ loopTripCount = builder.CreateBinaryIntrinsic(
+ llvm::Intrinsic::abs, loopTripCount, builder.getFalse());
+ // For loops that have a step value not equal to 1, we need to adjust the
+ // trip count to ensure the correct number of iterations for the loop is
+ // captured.
+ llvm::Value *loopTripCountDivStep =
+ builder.CreateSDiv(loopTripCount, loopStep);
+ loopTripCountDivStep = builder.CreateBinaryIntrinsic(
+ llvm::Intrinsic::abs, loopTripCountDivStep, builder.getFalse());
+ llvm::Value *loopTripCountRem =
+ builder.CreateSRem(loopTripCount, loopStep);
+ loopTripCountRem = builder.CreateBinaryIntrinsic(
+ llvm::Intrinsic::abs, loopTripCountRem, builder.getFalse());
+ llvm::Value *needsRoundUp = builder.CreateICmpNE(
+ loopTripCountRem,
+ builder.getIntN(loopTripCountRem->getType()->getIntegerBitWidth(),
+ 0));
+ loopTripCount =
+ builder.CreateAdd(loopTripCountDivStep,
+ builder.CreateZExtOrTrunc(
+ needsRoundUp, loopTripCountDivStep->getType()));
+ ubVal = builder.CreateMul(ubVal, loopTripCount);
+ }
+ lbVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+ stepVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
+ } else {
+ llvm::Expected<llvm::Value *> ubOrErr =
+ lookupOrTranslatePureValue(upperBounds[0], moduleTranslation, builder);
+ if (!ubOrErr)
+ return ubOrErr.takeError();
+ llvm::Expected<llvm::Value *> stepOrErr =
+ lookupOrTranslatePureValue(steps[0], moduleTranslation, builder);
+ if (!stepOrErr)
+ return stepOrErr.takeError();
+ lbVal = *firstLbOrErr;
+ ubVal = *ubOrErr;
+ stepVal = *stepOrErr;
+ }
+
+ assert(lbVal != nullptr && "Expected value for lbVal");
+ assert(ubVal != nullptr && "Expected value for ubVal");
+ assert(stepVal != nullptr && "Expected value for stepVal");
+ return llvm::Error::success();
+}
+
// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
@@ -2963,6 +3129,14 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
// Set up inserttion point for call to createTaskloop()
builder.SetInsertPoint(taskloopStartBlock);
+ auto loopOp = cast<omp::LoopNestOp>(loopWrapperOp.getWrappedLoop());
+ llvm::Value *lbVal = nullptr;
+ llvm::Value *ubVal = nullptr;
+ llvm::Value *stepVal = nullptr;
+ if (llvm::Error err = computeTaskloopBounds(
+ loopOp, builder, moduleTranslation, lbVal, ubVal, stepVal))
+ return handleError(std::move(err), opInst);
+
auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codegenIP) -> llvm::Error {
// Save the alloca insertion point on ModuleTranslation stack for use in
@@ -3129,91 +3303,11 @@ convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
return builder.saveIP();
};
- auto loopOp = cast<omp::LoopNestOp>(loopWrapperOp.getWrappedLoop());
-
auto loopInfo = [&]() -> llvm::Expected<llvm::CanonicalLoopInfo *> {
llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation);
return loopInfo;
};
- Operation::operand_range lowerBounds = loopOp.getLoopLowerBounds();
- Operation::operand_range upperBounds = loopOp.getLoopUpperBounds();
- Operation::operand_range steps = loopOp.getLoopSteps();
- llvm::Type *boundType =
- moduleTranslation.lookupValue(lowerBounds[0])->getType();
- llvm::Value *lbVal = nullptr;
- llvm::Value *ubVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
- llvm::Value *stepVal = nullptr;
- if (loopOp.getCollapseNumLoops() > 1) {
- // In cases where Collapse is used with Taskloop, the upper bound of the
- // iteration space needs to be recalculated to cater for the collapsed loop.
- // The Collapsed Loop UpperBound is the product of all collapsed
- // loop's tripcount.
- // The LowerBound for collapsed loops is always 1. When the loops are
- // collapsed, it will reset the bounds and introduce processing to ensure
- // the index's are presented as expected. As this happens after creating
- // Taskloop, these bounds need predicting. Example:
- // !$omp taskloop collapse(2)
- // do i = 1, 10
- // do j = 1, 5
- // ..
- // end do
- // end do
- // This loop above has a total of 50 iterations, so the lb will be 1, and
- // the ub will be 50. collapseLoops in OMPIRBuilder then handles ensuring
- // that i and j are properly presented when used in the loop.
- for (uint64_t i = 0; i < loopOp.getCollapseNumLoops(); i++) {
- llvm::Value *loopLb = moduleTranslation.lookupValue(lowerBounds[i]);
- llvm::Value *loopUb = moduleTranslation.lookupValue(upperBounds[i]);
- llvm::Value *loopStep = moduleTranslation.lookupValue(steps[i]);
- // In some cases, such as where the ub is less than the lb so the loop
- // steps down, the calculation for the loopTripCount is swapped. To ensure
- // the correct value is found, calculate both UB - LB and LB - UB then
- // select which value to use depending on how the loop has been
- // configured.
- llvm::Value *loopLbMinusOne = builder.CreateSub(
- loopLb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
- llvm::Value *loopUbMinusOne = builder.CreateSub(
- loopUb, builder.getIntN(boundType->getIntegerBitWidth(), 1));
- llvm::Value *boundsCmp = builder.CreateICmpSLT(loopLb, loopUb);
- llvm::Value *ubMinusLb = builder.CreateSub(loopUb, loopLbMinusOne);
- llvm::Value *lbMinusUb = builder.CreateSub(loopLb, loopUbMinusOne);
- llvm::Value *loopTripCount =
- builder.CreateSelect(boundsCmp, ubMinusLb, lbMinusUb);
- loopTripCount = builder.CreateBinaryIntrinsic(
- llvm::Intrinsic::abs, loopTripCount, builder.getFalse());
- // For loops that have a step value not equal to 1, we need to adjust the
- // trip count to ensure the correct number of iterations for the loop is
- // captured.
- llvm::Value *loopTripCountDivStep =
- builder.CreateSDiv(loopTripCount, loopStep);
- loopTripCountDivStep = builder.CreateBinaryIntrinsic(
- llvm::Intrinsic::abs, loopTripCountDivStep, builder.getFalse());
- llvm::Value *loopTripCountRem =
- builder.CreateSRem(loopTripCount, loopStep);
- loopTripCountRem = builder.CreateBinaryIntrinsic(
- llvm::Intrinsic::abs, loopTripCountRem, builder.getFalse());
- llvm::Value *needsRoundUp = builder.CreateICmpNE(
- loopTripCountRem,
- builder.getIntN(loopTripCountRem->getType()->getIntegerBitWidth(),
- 0));
- loopTripCount =
- builder.CreateAdd(loopTripCountDivStep,
- builder.CreateZExtOrTrunc(
- needsRoundUp, loopTripCountDivStep->getType()));
- ubVal = builder.CreateMul(ubVal, loopTripCount);
- }
- lbVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
- stepVal = builder.getIntN(boundType->getIntegerBitWidth(), 1);
- } else {
- lbVal = moduleTranslation.lookupValue(lowerBounds[0]);
- ubVal = moduleTranslation.lookupValue(upperBounds[0]);
- stepVal = moduleTranslation.lookupValue(steps[0]);
- }
- assert(lbVal != nullptr && "Expected value for lbVal");
- assert(ubVal != nullptr && "Expected value for ubVal");
- assert(stepVal != nullptr && "Expected value for stepVal");
-
llvm::Value *ifCond = nullptr;
llvm::Value *grainsize = nullptr;
int sched = 0; // default
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index cf398f151ed0b..7818c2f9e6fc9 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -990,9 +990,8 @@ llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(
/// Given a single MLIR operation, create the corresponding LLVM IR operation
/// using the `builder`.
-LogicalResult ModuleTranslation::convertOperation(Operation &op,
- llvm::IRBuilderBase &builder,
- bool recordInsertions) {
+LogicalResult ModuleTranslation::convertOperationImpl(
+ Operation &op, llvm::IRBuilderBase &builder, bool recordInsertions) {
const LLVMTranslationDialectInterface *opIface = iface.getInterfaceFor(&op);
if (!opIface)
return op.emitError("cannot be converted to LLVM IR: missing "
@@ -1052,7 +1051,7 @@ LogicalResult ModuleTranslation::convertBlockImpl(Block &bb,
builder.SetCurrentDebugLocation(
debugTranslation->translateLoc(op.getLoc(), subprogram));
- if (failed(convertOperation(op, builder, recordInsertions)))
+ if (failed(convertOperationImpl(op, builder, recordInsertions)))
return failure();
// Set the branch weight metadata on the translated instruction.
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 98087cc43588f..2a7a7827ca6fa 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -3035,11 +3035,17 @@ func.func @omp_taskloop_invalid_composite(%lb: index, %ub: index, %step: index)
return
}
+omp.private {type = private} @taskloop.bound.privatizer : index init {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ omp.yield(%arg0 : !llvm.ptr)
+}
+
// -----
-func.func @omp_taskloop_local_loop_bounds() {
- // expected-error @below {{'omp.taskloop.context' op expects loop bounds and steps to be defined outside of the taskloop.context region}}
- omp.taskloop.context {
- %lb = arith.constant 1 : index
+func.func @omp_taskloop_local_loop_bounds_from_block_arg(%arg0: index) {
+ %c1 = arith.constant 1 : index
+ // expected-error @below {{'omp.taskloop.context' op expects loop bounds and steps to be defined outside of the taskloop.context region or by pure, regionless operations that do not depend on block arguments}}
+ omp.taskloop.context private(@taskloop.bound.privatizer %arg0 -> %arg1 : index) {
+ %lb = arith.addi %arg1, %c1 : index
%ub = arith.constant 10 : index
%step = arith.constant 1 : index
omp.taskloop.wrapper {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 869f163cb4014..90db9187a56bf 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2861,6 +2861,25 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () {
omp.terminator
}
+ // CHECK: omp.taskloop.context {
+ omp.taskloop.context {
+ // CHECK: %[[LB:.+]] = arith.constant 1 : i32
+ %local_lb = arith.constant 1 : i32
+ // CHECK: %[[UB:.+]] = arith.constant 10 : i32
+ %local_ub = arith.constant 10 : i32
+ // CHECK: %[[STEP:.+]] = arith.constant 1 : i32
+ %local_step = arith.constant 1 : i32
+ // CHECK: omp.taskloop.wrapper {
+ omp.taskloop.wrapper {
+ // CHECK: omp.loop_nest (%{{.+}}) : i32 = (%[[LB]]) to (%[[UB]]) step (%[[STEP]]) {
+ omp.loop_nest (%i) : i32 = (%local_lb) to (%local_ub) step (%local_step) {
+ // CHECK: omp.yield
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+
// CHECK: return
return
}
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir b/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir
new file mode 100644
index 0000000000000..8868423b9b194
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-local-bounds.mlir
@@ -0,0 +1,65 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Taskloop loop bounds defined inside omp.taskloop.context by pure operations
+// that do not depend on block arguments should be materialized in the parent
+// function before the runtime call and translated again inside the outlined
+// task.
+
+omp.private {type = private} @_QPtest_taskloop_local_bounds_private_i32 : i32
+
+// CHECK-LABEL: define void @_QPtest_taskloop_local_constants(
+llvm.func @_QPtest_taskloop_local_constants() {
+ %one_i64 = llvm.mlir.constant(1 : i64) : i64
+ %i = llvm.alloca %one_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ omp.taskloop.context private(@_QPtest_taskloop_local_bounds_private_i32 %i -> %arg0 : !llvm.ptr) {
+ %lb = llvm.mlir.constant(1 : i32) : i32
+ %ub = llvm.mlir.constant(10 : i32) : i32
+ %step = llvm.mlir.constant(1 : i32) : i32
+ omp.taskloop.wrapper {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+ llvm.store %iv, %arg0 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK: %[[GEP_LB:.*]] = getelementptr
+// CHECK: store i64 1, ptr %[[GEP_LB]]
+// CHECK: %[[GEP_UB:.*]] = getelementptr
+// CHECK: store i64 10, ptr %[[GEP_UB]]
+// CHECK: %[[GEP_STEP:.*]] = getelementptr
+// CHECK: store i64 1, ptr %[[GEP_STEP]]
+// CHECK: call void @__kmpc_taskloop(
+
+// CHECK-LABEL: define internal void @_QPtest_taskloop_local_constants..omp_par(
+// CHECK: %[[OL_GEP_LB:.*]] = getelementptr
+// CHECK: %[[OL_LOAD_LB:.*]] = load i64, ptr %[[OL_GEP_LB]]
+// CHECK: %[[OL_GEP_UB:.*]] = getelementptr
+// CHECK: %[[OL_LOAD_UB:.*]] = load i64, ptr %[[OL_GEP_UB]]
+
+// CHECK-LABEL: define void @_QPtest_taskloop_local_derived_bound(
+llvm.func @_QPtest_taskloop_local_derived_bound() {
+ %one_i64 = llvm.mlir.constant(1 : i64) : i64
+ %i = llvm.alloca %one_i64 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
+ omp.taskloop.context private(@_QPtest_taskloop_local_bounds_private_i32 %i -> %arg0 : !llvm.ptr) {
+ %lb = llvm.mlir.constant(1 : i32) : i32
+ %ten = llvm.mlir.constant(10 : i32) : i32
+ %two = llvm.mlir.constant(2 : i32) : i32
+ %ub = llvm.add %ten, %two : i32
+ %step = llvm.mlir.constant(1 : i32) : i32
+ omp.taskloop.wrapper {
+ omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+ llvm.store %iv, %arg0 : i32, !llvm.ptr
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK: store i64 12, ptr %{{.*}}
+// CHECK: call void @__kmpc_taskloop(
>From d20b1b650b4cdde7fc5e1ed8befff2a1124f9f3a Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 13 Apr 2026 14:21:02 +0100
Subject: [PATCH 2/2] Add return type to lambda
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 20328375e8e9f..691793b50d33d 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3451,7 +3451,7 @@ LogicalResult TaskloopContextOp::verifyRegions() {
return llvm::all_of(defOp->getOperands(), isValidBoundValue);
};
- auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) {
+ auto hasUnsupportedTaskloopLocalBound = [&](OperandRange range) -> bool {
return llvm::any_of(range,
[&](Value value) { return !isValidBoundValue(value); });
};
More information about the Mlir-commits
mailing list